src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18 #include "Buffer.h"
  19 #include "BufferParams.h"
  20 #include "buffer_funcs.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "InsetIterator.h"
  24 #include "Language.h"
  25 #include "output_xhtml.h"
  26 #include "Paragraph.h"
  27 #include "TextClass.h"
  28 #include "TocBackend.h"
  29
  30 #include "support/convert.h"
  31 #include "support/debug.h"
  32 #include "support/docstream.h"
  33 #include "support/gettext.h"
  34 #include "support/lassert.h"
  35 #include "support/lstrings.h"
  36 #include "support/regex.h"
  37 #include "support/textutils.h"
  38
  39 #include <map>
  40 #include <set>
  41
  42 using namespace std;
  43 using namespace lyx::support;
  44
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 // Remove placeholders from names
  51 docstring renormalize(docstring const & input)
  52 {
  53         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  54         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  55 }
  56
  57
  58 // Split the surname into prefix ("von-part") and family name
  59 pair<docstring, docstring> parseSurname(docstring const & sname)
  60 {
  61         // Split the surname into its tokens
  62         vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
  63         if (pieces.size() < 2)
  64                 return make_pair(docstring(), sname);
  65
  66         // Now we look for pieces that begin with a lower case letter.
  67         // All except for the very last token constitute the "von-part".
  68         docstring prefix;
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator const en = pieces.end();
  71         bool first = true;
  72         for (; it != en; ++it) {
  73                 if ((*it).empty())
  74                         continue;
  75                 // If this is the last piece, then what we now have is
  76                 // the family name, notwithstanding the casing.
  77                 if (it + 1 == en)
  78                         break;
  79                 char_type const c = (*it)[0];
  80                 // If the piece starts with a upper case char, we assume
  81                 // this is part of the surname.
  82                 if (!isLower(c))
  83                         break;
  84                 // Nothing of the former, so add this piece to the prename
  85                 if (!first)
  86                         prefix += " ";
  87                 else
  88                         first = false;
  89                 prefix += *it;
  90         }
  91
  92         // Reconstruct the family name.
  93         // Note that if we left the loop with because it + 1 == en,
  94         // then this will still do the right thing, i.e., make surname
  95         // just be the last piece.
  96         docstring surname;
  97         first = true;
  98         for (; it != en; ++it) {
  99                 if (!first)
 100                         surname += " ";
 101                 else
 102                         first = false;
 103                 surname += *it;
 104         }
 105         return make_pair(prefix, surname);
 106 }
 107
 108
 109 struct name_parts {
 110         docstring surname;
 111         docstring prename;
 112         docstring suffix;
 113         docstring prefix;
 114 };
 115
 116
 117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
 118 name_parts nameParts(docstring const & iname)
 119 {
 120         name_parts res;
 121         if (iname.empty())
 122                 return res;
 123
 124         // First we check for goupings (via {...}) and replace blanks and
 125         // commas inside groups with temporary placeholders
 126         docstring name;
 127         int gl = 0;
 128         docstring::const_iterator p = iname.begin();
 129         while (p != iname.end()) {
 130                 // count grouping level
 131                 if (*p == '{')
 132                         ++gl;
 133                 else if (*p == '}')
 134                         --gl;
 135                 // generate string with probable placeholders
 136                 if (*p == ' ' && gl > 0)
 137                         name += from_ascii("$$space!");
 138                 else if (*p == ',' && gl > 0)
 139                         name += from_ascii("$$comma!");
 140                 else
 141                         name += *p;
 142                 ++p;
 143         }
 144
 145         // Now we look for a comma, and take the last name to be everything
 146         // preceding the right-most one, so that we also get the name suffix
 147         // (aka "jr" part).
 148         vector<docstring> pieces = getVectorFromString(name);
 149         if (pieces.size() > 1) {
 150                 // Whether we have a name suffix or not, the prename is
 151                 // always last item
 152                 res.prename = renormalize(pieces.back());
 153                 // The family name, conversely, is always the first item.
 154                 // However, it might contain a prefix (aka "von" part)
 155                 docstring const sname = pieces.front();
 156                 res.prefix = renormalize(parseSurname(sname).first);
 157                 res.surname = renormalize(parseSurname(sname).second);
 158                 // If we have three pieces (the maximum allowed by BibTeX),
 159                 // the second one is the name suffix.
 160                 if (pieces.size() > 2)
 161                         res.suffix = renormalize(pieces.at(1));
 162                 return res;
 163         }
 164
 165         // OK, so now we want to look for the last name.
 166         // Split on spaces, to get various tokens.
 167         pieces = getVectorFromString(name, from_ascii(" "));
 168         // No space: Only a family name given
 169         if (pieces.size() < 2) {
 170                 res.surname = renormalize(pieces.back());
 171                 return res;
 172         }
 173         // If we get two pieces, assume "prename surname"
 174         if (pieces.size() == 2) {
 175                 res.prename = renormalize(pieces.front());
 176                 res.surname = renormalize(pieces.back());
 177                 return res;
 178         }
 179
 180         // More than 3 pieces: A name prefix (aka "von" part) might be included.
 181         // We look for the first piece that begins with a lower case letter
 182         // (which is the name prefix, if it is not the last token) or the last token.
 183         docstring prename;
 184         vector<docstring>::const_iterator it = pieces.begin();
 185         vector<docstring>::const_iterator const en = pieces.end();
 186         bool first = true;
 187         for (; it != en; ++it) {
 188                 if ((*it).empty())
 189                         continue;
 190                 char_type const c = (*it)[0];
 191                 // If the piece starts with a lower case char, we assume
 192                 // this is the name prefix and thus prename is complete.
 193                 if (isLower(c))
 194                         break;
 195                 // Same if this is the last piece, which is always the surname.
 196                 if (it + 1 == en)
 197                         break;
 198                 // Nothing of the former, so add this piece to the prename
 199                 if (!first)
 200                         prename += " ";
 201                 else
 202                         first = false;
 203                 prename += *it;
 204         }
 205
 206         // Now reconstruct the family name and strip the prefix.
 207         // Note that if we left the loop because it + 1 == en,
 208         // then this will still do the right thing, i.e., make surname
 209         // just be the last piece.
 210         docstring surname;
 211         first = true;
 212         for (; it != en; ++it) {
 213                 if (!first)
 214                         surname += " ";
 215                 else
 216                         first = false;
 217                 surname += *it;
 218         }
 219         res.prename = renormalize(prename);
 220         res.prefix = renormalize(parseSurname(surname).first);
 221         res.surname = renormalize(parseSurname(surname).second);
 222         return res;
 223 }
 224
 225
 226 docstring constructName(docstring const & name, string const scheme)
 227 {
 228         // re-constructs a name from name parts according
 229         // to a given scheme
 230         docstring const prename = nameParts(name).prename;
 231         docstring const surname = nameParts(name).surname;
 232         docstring const prefix = nameParts(name).prefix;
 233         docstring const suffix = nameParts(name).suffix;
 234         string res = scheme;
 235         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 236         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 237         static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 238         smatch sub;
 239         if (regex_match(scheme, sub, reg1)) {
 240                 res = sub.str(1);
 241                 if (!prename.empty())
 242                         res += sub.str(3);
 243                 res += sub.str(5);
 244         }
 245         if (regex_match(res, sub, reg2)) {
 246                 res = sub.str(1);
 247                 if (!suffix.empty())
 248                         res += sub.str(3);
 249                 res += sub.str(5);
 250         }
 251         if (regex_match(res, sub, reg3)) {
 252                 res = sub.str(1);
 253                 if (!prefix.empty())
 254                         res += sub.str(3);
 255                 res += sub.str(5);
 256         }
 257         docstring result = from_ascii(res);
 258         result = subst(result, from_ascii("%prename%"), prename);
 259         result = subst(result, from_ascii("%surname%"), surname);
 260         result = subst(result, from_ascii("%prefix%"), prefix);
 261         result = subst(result, from_ascii("%suffix%"), suffix);
 262         return result;
 263 }
 264
 265
 266 vector<docstring> const getAuthors(docstring const & author)
 267 {
 268         // We check for goupings (via {...}) and only consider " and "
 269         // outside groups as author separator. This is to account
 270         // for cases such as {{Barnes and Noble, Inc.}}, which
 271         // need to be treated as one single family name.
 272         // We use temporary placeholders in order to differentiate the
 273         // diverse " and " cases.
 274
 275         // First, we temporarily replace all ampersands. It is rather unusual
 276         // in author names, but can happen (consider cases such as "C \& A Corp.").
 277         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 278         // Then, we temporarily make all " and " strings to ampersands in order
 279         // to handle them later on a per-char level.
 280         iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
 281         // Now we traverse through the string and replace the "&" by the proper
 282         // output in- and outside groups
 283         docstring name;
 284         int gl = 0;
 285         docstring::const_iterator p = iname.begin();
 286         while (p != iname.end()) {
 287                 // count grouping level
 288                 if (*p == '{')
 289                         ++gl;
 290                 else if (*p == '}')
 291                         --gl;
 292                 // generate string with probable placeholders
 293                 if (*p == '&') {
 294                         if (gl > 0)
 295                                 // Inside groups, we output "and"
 296                                 name += from_ascii("and");
 297                         else
 298                                 // Outside groups, we output a separator
 299                                 name += from_ascii("$$namesep!");
 300                 }
 301                 else
 302                         name += *p;
 303                 ++p;
 304         }
 305
 306         // re-insert the literal ampersands
 307         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 308
 309         // Now construct the actual vector
 310         return getVectorFromString(name, from_ascii(" $$namesep! "));
 311 }
 312
 313
 314 bool multipleAuthors(docstring const author)
 315 {
 316         return getAuthors(author).size() > 1;
 317 }
 318
 319
 320 // converts a string containing LaTeX commands into unicode
 321 // for display.
 322 docstring convertLaTeXCommands(docstring const & str)
 323 {
 324         docstring val = str;
 325         docstring ret;
 326
 327         bool scanning_cmd = false;
 328         bool scanning_math = false;
 329         bool escaped = false; // used to catch \$, etc.
 330         while (!val.empty()) {
 331                 char_type const ch = val[0];
 332
 333                 // if we're scanning math, we output everything until we
 334                 // find an unescaped $, at which point we break out.
 335                 if (scanning_math) {
 336                         if (escaped)
 337                                 escaped = false;
 338                         else if (ch == '\\')
 339                                 escaped = true;
 340                         else if (ch == '$')
 341                                 scanning_math = false;
 342                         ret += ch;
 343                         val = val.substr(1);
 344                         continue;
 345                 }
 346
 347                 // if we're scanning a command name, then we just
 348                 // discard characters until we hit something that
 349                 // isn't alpha.
 350                 if (scanning_cmd) {
 351                         if (isAlphaASCII(ch)) {
 352                                 val = val.substr(1);
 353                                 escaped = false;
 354                                 continue;
 355                         }
 356                         // so we're done with this command.
 357                         // now we fall through and check this character.
 358                         scanning_cmd = false;
 359                 }
 360
 361                 // was the last character a \? If so, then this is something like:
 362                 // \\ or \$, so we'll just output it. That's probably not always right...
 363                 if (escaped) {
 364                         // exception: output \, as THIN SPACE
 365                         if (ch == ',')
 366                                 ret.push_back(0x2009);
 367                         else
 368                                 ret += ch;
 369                         val = val.substr(1);
 370                         escaped = false;
 371                         continue;
 372                 }
 373
 374                 if (ch == '$') {
 375                         ret += ch;
 376                         val = val.substr(1);
 377                         scanning_math = true;
 378                         continue;
 379                 }
 380
 381                 // we just ignore braces
 382                 if (ch == '{' || ch == '}') {
 383                         val = val.substr(1);
 384                         continue;
 385                 }
 386
 387                 // we're going to check things that look like commands, so if
 388                 // this doesn't, just output it.
 389                 if (ch != '\\') {
 390                         ret += ch;
 391                         val = val.substr(1);
 392                         continue;
 393                 }
 394
 395                 // ok, could be a command of some sort
 396                 // let's see if it corresponds to some unicode
 397                 // unicodesymbols has things in the form: \"{u},
 398                 // whereas we may see things like: \"u. So we'll
 399                 // look for that and change it, if necessary.
 400                 // FIXME: This is a sort of mini-tex2lyx.
 401                 //        Use the real tex2lyx instead!
 402                 static lyx::regex const reg("^\\\\\\W\\w");
 403                 if (lyx::regex_search(to_utf8(val), reg)) {
 404                         val.insert(3, from_ascii("}"));
 405                         val.insert(2, from_ascii("{"));
 406                 }
 407                 bool termination;
 408                 docstring rem;
 409                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 410                                 Encodings::TEXT_CMD, termination, rem);
 411                 if (!cnvtd.empty()) {
 412                         // it did, so we'll take that bit and proceed with what's left
 413                         ret += cnvtd;
 414                         val = rem;
 415                         continue;
 416                 }
 417                 // it's a command of some sort
 418                 scanning_cmd = true;
 419                 escaped = true;
 420                 val = val.substr(1);
 421         }
 422         return ret;
 423 }
 424
 425
 426 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 427 docstring processRichtext(docstring const & str, bool richtext)
 428 {
 429         docstring val = str;
 430         docstring ret;
 431
 432         bool scanning_rich = false;
 433         while (!val.empty()) {
 434                 char_type const ch = val[0];
 435                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 436                         // beginning of rich text
 437                         scanning_rich = true;
 438                         val = val.substr(2);
 439                         continue;
 440                 }
 441                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 442                         // end of rich text
 443                         scanning_rich = false;
 444                         val = val.substr(2);
 445                         continue;
 446                 }
 447                 if (richtext) {
 448                         if (scanning_rich)
 449                                 ret += ch;
 450                         else {
 451                                 // we need to escape '<' and '>'
 452                                 if (ch == '<')
 453                                         ret += "&lt;";
 454                                 else if (ch == '>')
 455                                         ret += "&gt;";
 456                                 else
 457                                         ret += ch;
 458                         }
 459                 } else if (!scanning_rich /* && !richtext */)
 460                         ret += ch;
 461                 // else the character is discarded, which will happen only if
 462                 // richtext == false and we are scanning rich text
 463                 val = val.substr(1);
 464         }
 465         return ret;
 466 }
 467
 468 } // anon namespace
 469
 470
 471 //////////////////////////////////////////////////////////////////////
 472 //
 473 // BibTeXInfo
 474 //
 475 //////////////////////////////////////////////////////////////////////
 476
 477 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 478         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 479           modifier_(0)
 480 {}
 481
 482
 483
 484 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 485                                           bool full, bool forceshort) const
 486 {
 487         docstring author = operator[]("author");
 488         if (author.empty())
 489                 author = operator[]("editor");
 490
 491         return getAuthorList(buf, author, full, forceshort);
 492 }
 493
 494
 495 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 496                 docstring const & author, bool const full, bool const forceshort,
 497                 bool const allnames, bool const beginning) const
 498 {
 499         // Maxnames treshold depend on engine
 500         size_t maxnames = buf ?
 501                 buf->params().documentClass().max_citenames() : 2;
 502
 503         if (!is_bibtex_) {
 504                 docstring const opt = label();
 505                 if (opt.empty())
 506                         return docstring();
 507
 508                 docstring authors;
 509                 docstring const remainder = trim(split(opt, authors, '('));
 510                 if (remainder.empty())
 511                         // in this case, we didn't find a "(",
 512                         // so we don't have author (year)
 513                         return docstring();
 514                 return authors;
 515         }
 516
 517         if (author.empty())
 518                 return author;
 519
 520         // OK, we've got some names. Let's format them.
 521         // Try to split the author list
 522         vector<docstring> const authors = getAuthors(author);
 523
 524         docstring retval;
 525
 526         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 527                                                : ENGINE_TYPE_DEFAULT;
 528
 529         // These are defined in the styles
 530         string const etal =
 531                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
 532                     : " et al.";
 533         string const namesep =
 534                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
 535                    : ", ";
 536         string const lastnamesep =
 537                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
 538                     : ", and ";
 539         string const pairnamesep =
 540                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
 541                      : " and ";
 542         string firstnameform =
 543                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 544                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 545         if (!beginning)
 546                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 547                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 548         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 549                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 550         if (!beginning)
 551                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 552                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 553         string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
 554                              : "{%prefix%[[%prefix% ]]}%surname%";
 555
 556         // Shorten the list (with et al.) if forceshort is set
 557         // and the list can actually be shortened, else if maxcitenames
 558         // is passed and full is not set.
 559         bool shorten = forceshort && authors.size() > 1;
 560         vector<docstring>::const_iterator it = authors.begin();
 561         vector<docstring>::const_iterator en = authors.end();
 562         for (size_t i = 0; it != en; ++it, ++i) {
 563                 if (i >= maxnames && !full) {
 564                         shorten = true;
 565                         break;
 566                 }
 567                 if (*it == "others") {
 568                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 569                         break;
 570                 }
 571                 if (i > 0 && i == authors.size() - 1) {
 572                         if (authors.size() == 2)
 573                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 574                         else
 575                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 576                 } else if (i > 0)
 577                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 578                 if (allnames)
 579                         retval += (i == 0) ? constructName(*it, firstnameform)
 580                                 : constructName(*it, othernameform);
 581                 else
 582                         retval += constructName(*it, citenameform);
 583         }
 584         if (shorten) {
 585                 if (allnames)
 586                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 587                 else
 588                         retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 589         }
 590
 591         return convertLaTeXCommands(retval);
 592 }
 593
 594
 595 docstring const BibTeXInfo::getYear() const
 596 {
 597         if (is_bibtex_) {
 598                 // first try legacy year field
 599                 docstring year = operator[]("year");
 600                 if (!year.empty())
 601                         return year;
 602                 // now try biblatex's date field
 603                 year = operator[]("date");
 604                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 605                 // We only want the years.
 606                 static regex const yreg("[-]?([\\d]{4}).*");
 607                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 608                 smatch sm;
 609                 string const date = to_utf8(year);
 610                 if (!regex_match(date, sm, yreg))
 611                         // cannot parse year.
 612                         return docstring();
 613                 year = from_ascii(sm[1]);
 614                 // check for an endyear
 615                 if (regex_match(date, sm, ereg))
 616                         year += char_type(0x2013) + from_ascii(sm[1]);
 617                 return year;
 618         }
 619
 620         docstring const opt = label();
 621         if (opt.empty())
 622                 return docstring();
 623
 624         docstring authors;
 625         docstring tmp = split(opt, authors, '(');
 626         if (tmp.empty())
 627                 // we don't have author (year)
 628                 return docstring();
 629         docstring year;
 630         tmp = split(tmp, year, ')');
 631         return year;
 632 }
 633
 634
 635 namespace {
 636
 637 docstring parseOptions(docstring const & format, string & optkey,
 638                     docstring & ifpart, docstring & elsepart);
 639
 640 // Calls parseOptions to deal with an embedded option, such as:
 641 //   {%number%[[, no.~%number%]]}
 642 // which must appear at the start of format. ifelsepart gets the
 643 // whole of the option, and we return what's left after the option.
 644 // we return format if there is an error.
 645 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 646 {
 647         LASSERT(format[0] == '{' && format[1] == '%', return format);
 648         string optkey;
 649         docstring ifpart;
 650         docstring elsepart;
 651         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 652         if (format == rest) { // parse error
 653                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 654                 return format;
 655         }
 656         LASSERT(rest.size() <= format.size(),
 657                 { ifelsepart = docstring(); return format; });
 658         ifelsepart = format.substr(0, format.size() - rest.size());
 659         return rest;
 660 }
 661
 662
 663 // Gets a "clause" from a format string, where the clause is
 664 // delimited by '[[' and ']]'. Returns what is left after the
 665 // clause is removed, and returns format if there is an error.
 666 docstring getClause(docstring const & format, docstring & clause)
 667 {
 668         docstring fmt = format;
 669         // remove '[['
 670         fmt = fmt.substr(2);
 671         // we'll remove characters from the front of fmt as we
 672         // deal with them
 673         while (!fmt.empty()) {
 674                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 675                         // that's the end
 676                         fmt = fmt.substr(2);
 677                         break;
 678                 }
 679                 // check for an embedded option
 680                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 681                         docstring part;
 682                         docstring const rest = parseEmbeddedOption(fmt, part);
 683                         if (fmt == rest) {
 684                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 685                                 return format;
 686                         }
 687                         clause += part;
 688                         fmt = rest;
 689                 } else { // it's just a normal character
 690                                 clause += fmt[0];
 691                                 fmt = fmt.substr(1);
 692                 }
 693         }
 694         return fmt;
 695 }
 696
 697
 698 // parse an options string, which must appear at the start of the
 699 // format parameter. puts the parsed bits in optkey, ifpart, and
 700 // elsepart and returns what's left after the option is removed.
 701 // if there's an error, it returns format itself.
 702 docstring parseOptions(docstring const & format, string & optkey,
 703                     docstring & ifpart, docstring & elsepart)
 704 {
 705         LASSERT(format[0] == '{' && format[1] == '%', return format);
 706         // strip '{%'
 707         docstring fmt = format.substr(2);
 708         size_t pos = fmt.find('%'); // end of key
 709         if (pos == string::npos) {
 710                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 711                 return format;
 712         }
 713         optkey = to_utf8(fmt.substr(0, pos));
 714         fmt = fmt.substr(pos + 1);
 715         // [[format]] should be next
 716         if (fmt[0] != '[' || fmt[1] != '[') {
 717                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 718                 return format;
 719         }
 720
 721         docstring curfmt = fmt;
 722         fmt = getClause(curfmt, ifpart);
 723         if (fmt == curfmt) {
 724                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 725                 return format;
 726         }
 727
 728         if (fmt[0] == '}') // we're done, no else clause
 729                 return fmt.substr(1);
 730
 731         // else part should follow
 732         if (fmt[0] != '[' || fmt[1] != '[') {
 733                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 734                 return format;
 735         }
 736
 737         curfmt = fmt;
 738         fmt = getClause(curfmt, elsepart);
 739         // we should be done
 740         if (fmt == curfmt || fmt[0] != '}') {
 741                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 742                 return format;
 743         }
 744         return fmt.substr(1);
 745 }
 746
 747
 748 } // anon namespace
 749
 750 /* FIXME
 751 Bug #9131 revealed an oddity in how we are generating citation information
 752 when more than one key is given. We end up building a longer and longer format
 753 string as we go, which we then have to re-parse, over and over and over again,
 754 rather than generating the information for the individual keys and then putting
 755 all of that together. We do that to deal with the way separators work, from what
 756 I can tell, but it still feels like a hack. Fixing this would require quite a
 757 bit of work, however.
 758 */
 759 docstring BibTeXInfo::expandFormat(docstring const & format,
 760                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 761                 CiteItem const & ci, bool next, bool second) const
 762 {
 763         // incorrect use of macros could put us in an infinite loop
 764         static int const max_passes = 5000;
 765         // the use of overly large keys can lead to performance problems, due
 766         // to eventual attempts to convert LaTeX macros to unicode. See bug
 767         // #8944. By default, the size is limited to 128 (in CiteItem), but
 768         // for specific purposes (such as XHTML export), it needs to be enlarged
 769         // This is perhaps not the best solution, but it will have to do for now.
 770         size_t const max_keysize = ci.max_key_size;
 771         odocstringstream ret; // return value
 772         string key;
 773         bool scanning_key = false;
 774         bool scanning_rich = false;
 775
 776         CiteEngineType const engine_type = buf.params().citeEngineType();
 777         docstring fmt = format;
 778         // we'll remove characters from the front of fmt as we
 779         // deal with them
 780         while (!fmt.empty()) {
 781                 if (counter > max_passes) {
 782                         LYXERR0("Recursion limit reached while parsing `"
 783                                 << format << "'.");
 784                         return _("ERROR!");
 785                 }
 786
 787                 char_type thischar = fmt[0];
 788                 if (thischar == '%') {
 789                         // beginning or end of key
 790                         if (scanning_key) {
 791                                 // end of key
 792                                 scanning_key = false;
 793                                 // so we replace the key with its value, which may be empty
 794                                 if (key[0] == '!') {
 795                                         // macro
 796                                         string const val =
 797                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 798                                         fmt = from_utf8(val) + fmt.substr(1);
 799                                         counter += 1;
 800                                         continue;
 801                                 } else if (key[0] == '_') {
 802                                         // a translatable bit
 803                                         string const val =
 804                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 805                                         docstring const trans =
 806                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 807                                         ret << trans;
 808                                 } else {
 809                                         docstring const val =
 810                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 811                                         if (!scanning_rich)
 812                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 813                                         ret << val;
 814                                         if (!scanning_rich)
 815                                                 ret << from_ascii("{!</span>!}");
 816                                 }
 817                         } else {
 818                                 // beginning of key
 819                                 key.clear();
 820                                 scanning_key = true;
 821                         }
 822                 }
 823                 else if (thischar == '{') {
 824                         // beginning of option?
 825                         if (scanning_key) {
 826                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 827                                 return _("ERROR!");
 828                         }
 829                         if (fmt.size() > 1) {
 830                                 if (fmt[1] == '%') {
 831                                         // it is the beginning of an optional format
 832                                         string optkey;
 833                                         docstring ifpart;
 834                                         docstring elsepart;
 835                                         docstring const newfmt =
 836                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 837                                         if (newfmt == fmt) // parse error
 838                                                 return _("ERROR!");
 839                                         fmt = newfmt;
 840                                         docstring const val =
 841                                                 getValueForKey(optkey, buf, ci, xrefs);
 842                                         if (optkey == "next" && next)
 843                                                 ret << ifpart; // without expansion
 844                                         else if (optkey == "second" && second) {
 845                                                 int newcounter = 0;
 846                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 847                                                         ci, next);
 848                                         } else if (!val.empty()) {
 849                                                 int newcounter = 0;
 850                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 851                                                         ci, next);
 852                                         } else if (!elsepart.empty()) {
 853                                                 int newcounter = 0;
 854                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 855                                                         ci, next);
 856                                         }
 857                                         // fmt will have been shortened for us already
 858                                         continue;
 859                                 }
 860                                 if (fmt[1] == '!') {
 861                                         // beginning of rich text
 862                                         scanning_rich = true;
 863                                         fmt = fmt.substr(2);
 864                                         ret << from_ascii("{!");
 865                                         continue;
 866                                 }
 867                         }
 868                         // we are here if '{' was not followed by % or !.
 869                         // So it's just a character.
 870                         ret << thischar;
 871                 }
 872                 else if (scanning_rich && thischar == '!'
 873                          && fmt.size() > 1 && fmt[1] == '}') {
 874                         // end of rich text
 875                         scanning_rich = false;
 876                         fmt = fmt.substr(2);
 877                         ret << from_ascii("!}");
 878                         continue;
 879                 }
 880                 else if (scanning_key)
 881                         key += char(thischar);
 882                 else {
 883                         try {
 884                                 ret.put(thischar);
 885                         } catch (EncodingException & /* e */) {
 886                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 887                         }
 888                 }
 889                 fmt = fmt.substr(1);
 890         } // for loop
 891         if (scanning_key) {
 892                 LYXERR0("Never found end of key in `" << format << "'!");
 893                 return _("ERROR!");
 894         }
 895         if (scanning_rich) {
 896                 LYXERR0("Never found end of rich text in `" << format << "'!");
 897                 return _("ERROR!");
 898         }
 899         return ret.str();
 900 }
 901
 902
 903 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 904         Buffer const & buf, CiteItem const & ci) const
 905 {
 906         bool const richtext = ci.richtext;
 907
 908         if (!richtext && !info_.empty())
 909                 return info_;
 910         if (richtext && !info_richtext_.empty())
 911                 return info_richtext_;
 912
 913         if (!is_bibtex_) {
 914                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 915                 info_ = it->second;
 916                 return info_;
 917         }
 918
 919         CiteEngineType const engine_type = buf.params().citeEngineType();
 920         DocumentClass const & dc = buf.params().documentClass();
 921         docstring const & format =
 922                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 923         int counter = 0;
 924         info_ = expandFormat(format, xrefs, counter, buf,
 925                 ci, false, false);
 926
 927         if (info_.empty()) {
 928                 // this probably shouldn't happen
 929                 return info_;
 930         }
 931
 932         if (richtext) {
 933                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 934                 return info_richtext_;
 935         }
 936
 937         info_ = convertLaTeXCommands(processRichtext(info_, false));
 938         return info_;
 939 }
 940
 941
 942 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 943         Buffer const & buf, docstring const & format,
 944         CiteItem const & ci, bool next, bool second) const
 945 {
 946         docstring loclabel;
 947
 948         int counter = 0;
 949         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 950
 951         if (!loclabel.empty() && !next) {
 952                 loclabel = processRichtext(loclabel, ci.richtext);
 953                 loclabel = convertLaTeXCommands(loclabel);
 954         }
 955
 956         return loclabel;
 957 }
 958
 959
 960 docstring const & BibTeXInfo::operator[](docstring const & field) const
 961 {
 962         BibTeXInfo::const_iterator it = find(field);
 963         if (it != end())
 964                 return it->second;
 965         static docstring const empty_value = docstring();
 966         return empty_value;
 967 }
 968
 969
 970 docstring const & BibTeXInfo::operator[](string const & field) const
 971 {
 972         return operator[](from_ascii(field));
 973 }
 974
 975
 976 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 977         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 978 {
 979         // anything less is pointless
 980         LASSERT(maxsize >= 16, maxsize = 16);
 981         string key = oldkey;
 982         bool cleanit = false;
 983         if (prefixIs(oldkey, "clean:")) {
 984                 key = oldkey.substr(6);
 985                 cleanit = true;
 986         }
 987
 988         docstring ret = operator[](key);
 989         if (ret.empty() && !xrefs.empty()) {
 990                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 991                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 992                 for (; it != en; ++it) {
 993                         if (*it && !(**it)[key].empty()) {
 994                                 ret = (**it)[key];
 995                                 break;
 996                         }
 997                 }
 998         }
 999         if (ret.empty()) {
1000                 // some special keys
1001                 // FIXME: dialog, textbefore and textafter have nothing to do with this
1002                 if (key == "dialog" && ci.context == CiteItem::Dialog)
1003                         ret = from_ascii("x"); // any non-empty string will do
1004                 else if (key == "export" && ci.context == CiteItem::Export)
1005                         ret = from_ascii("x"); // any non-empty string will do
1006                 else if (key == "ifstar" && ci.Starred)
1007                         ret = from_ascii("x"); // any non-empty string will do
1008                 else if (key == "ifqualified" && ci.isQualified)
1009                         ret = from_ascii("x"); // any non-empty string will do
1010                 else if (key == "entrytype")
1011                         ret = entry_type_;
1012                 else if (prefixIs(key, "ifentrytype:")
1013                          && from_ascii(key.substr(12)) == entry_type_)
1014                         ret = from_ascii("x"); // any non-empty string will do
1015                 else if (key == "key")
1016                         ret = bib_key_;
1017                 else if (key == "label")
1018                         ret = label_;
1019                 else if (key == "modifier" && modifier_ != 0)
1020                         ret = modifier_;
1021                 else if (key == "numericallabel")
1022                         ret = cite_number_;
1023                 else if (prefixIs(key, "ifmultiple:")) {
1024                         // Return whether we have multiple authors
1025                         docstring const kind = operator[](from_ascii(key.substr(11)));
1026                         if (multipleAuthors(kind))
1027                                 ret = from_ascii("x"); // any non-empty string will do
1028                 }
1029                 else if (prefixIs(key, "abbrvnames:")) {
1030                         // Special key to provide abbreviated name list,
1031                         // with respect to maxcitenames. Suitable for Bibliography
1032                         // beginnings.
1033                         docstring const kind = operator[](from_ascii(key.substr(11)));
1034                         ret = getAuthorList(&buf, kind, false, false, true);
1035                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1036                                 ret[0] = uppercase(ret[0]);
1037                 } else if (prefixIs(key, "fullnames:")) {
1038                         // Return a full name list. Suitable for Bibliography
1039                         // beginnings.
1040                         docstring const kind = operator[](from_ascii(key.substr(10)));
1041                         ret = getAuthorList(&buf, kind, true, false, true);
1042                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1043                                 ret[0] = uppercase(ret[0]);
1044                 } else if (prefixIs(key, "forceabbrvnames:")) {
1045                         // Special key to provide abbreviated name lists,
1046                         // irrespective of maxcitenames. Suitable for Bibliography
1047                         // beginnings.
1048                         docstring const kind = operator[](from_ascii(key.substr(15)));
1049                         ret = getAuthorList(&buf, kind, false, true, true);
1050                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1051                                 ret[0] = uppercase(ret[0]);
1052                 } else if (prefixIs(key, "abbrvbynames:")) {
1053                         // Special key to provide abbreviated name list,
1054                         // with respect to maxcitenames. Suitable for further names inside a
1055                         // bibliography item // (such as "ed. by ...")
1056                         docstring const kind = operator[](from_ascii(key.substr(11)));
1057                         ret = getAuthorList(&buf, kind, false, false, true, false);
1058                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1059                                 ret[0] = uppercase(ret[0]);
1060                 } else if (prefixIs(key, "fullbynames:")) {
1061                         // Return a full name list. Suitable for further names inside a
1062                         // bibliography item // (such as "ed. by ...")
1063                         docstring const kind = operator[](from_ascii(key.substr(10)));
1064                         ret = getAuthorList(&buf, kind, true, false, true, false);
1065                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1066                                 ret[0] = uppercase(ret[0]);
1067                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1068                         // Special key to provide abbreviated name lists,
1069                         // irrespective of maxcitenames. Suitable for further names inside a
1070                         // bibliography item // (such as "ed. by ...")
1071                         docstring const kind = operator[](from_ascii(key.substr(15)));
1072                         ret = getAuthorList(&buf, kind, false, true, true, false);
1073                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1074                                 ret[0] = uppercase(ret[0]);
1075                 } else if (key == "abbrvciteauthor") {
1076                         // Special key to provide abbreviated author or
1077                         // editor names (suitable for citation labels),
1078                         // with respect to maxcitenames.
1079                         ret = getAuthorOrEditorList(&buf, false, false);
1080                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1081                                 ret[0] = uppercase(ret[0]);
1082                 } else if (key == "fullciteauthor") {
1083                         // Return a full author or editor list (for citation labels)
1084                         ret = getAuthorOrEditorList(&buf, true, false);
1085                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1086                                 ret[0] = uppercase(ret[0]);
1087                 } else if (key == "forceabbrvciteauthor") {
1088                         // Special key to provide abbreviated author or
1089                         // editor names (suitable for citation labels),
1090                         // irrespective of maxcitenames.
1091                         ret = getAuthorOrEditorList(&buf, false, true);
1092                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1093                                 ret[0] = uppercase(ret[0]);
1094                 } else if (key == "bibentry") {
1095                         // Special key to provide the full bibliography entry: see getInfo()
1096                         CiteEngineType const engine_type = buf.params().citeEngineType();
1097                         DocumentClass const & dc = buf.params().documentClass();
1098                         docstring const & format =
1099                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1100                         int counter = 0;
1101                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1102                 } else if (key == "textbefore")
1103                         ret = ci.textBefore;
1104                 else if (key == "textafter")
1105                         ret = ci.textAfter;
1106                 else if (key == "curpretext")
1107                         ret = ci.getPretexts()[bib_key_];
1108                 else if (key == "curposttext")
1109                         ret = ci.getPosttexts()[bib_key_];
1110                 else if (key == "year")
1111                         ret = getYear();
1112         }
1113
1114         if (cleanit)
1115                 ret = html::cleanAttr(ret);
1116
1117         // make sure it is not too big
1118         support::truncateWithEllipsis(ret, maxsize);
1119         return ret;
1120 }
1121
1122
1123 //////////////////////////////////////////////////////////////////////
1124 //
1125 // BiblioInfo
1126 //
1127 //////////////////////////////////////////////////////////////////////
1128
1129 namespace {
1130
1131 // A functor for use with sort, leading to case insensitive sorting
1132 class compareNoCase: public binary_function<docstring, docstring, bool>
1133 {
1134 public:
1135         bool operator()(docstring const & s1, docstring const & s2) const {
1136                 return compare_no_case(s1, s2) < 0;
1137         }
1138 };
1139
1140 } // namespace anon
1141
1142
1143 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1144 {
1145         vector<docstring> result;
1146         if (!data.isBibTeX())
1147                 return result;
1148         // Legacy crossref field. This is not nestable.
1149         if (!nested && !data["crossref"].empty()) {
1150                 docstring const xrefkey = data["crossref"];
1151                 result.push_back(xrefkey);
1152                 // However, check for nested xdatas
1153                 BiblioInfo::const_iterator it = find(xrefkey);
1154                 if (it != end()) {
1155                         BibTeXInfo const & xref = it->second;
1156                         vector<docstring> const nxdata = getXRefs(xref, true);
1157                         if (!nxdata.empty())
1158                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1159                 }
1160         }
1161         // Biblatex's xdata field. Infinitely nestable.
1162         // XData field can consist of a comma-separated list of keys
1163         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1164         if (!xdatakeys.empty()) {
1165                 vector<docstring>::const_iterator xit = xdatakeys.begin();
1166                 vector<docstring>::const_iterator xen = xdatakeys.end();
1167                 for (; xit != xen; ++xit) {
1168                         docstring const xdatakey = *xit;
1169                         result.push_back(xdatakey);
1170                         BiblioInfo::const_iterator it = find(xdatakey);
1171                         if (it != end()) {
1172                                 BibTeXInfo const & xdata = it->second;
1173                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1174                                 if (!nxdata.empty())
1175                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1176                         }
1177                 }
1178         }
1179         return result;
1180 }
1181
1182
1183 vector<docstring> const BiblioInfo::getKeys() const
1184 {
1185         vector<docstring> bibkeys;
1186         BiblioInfo::const_iterator it  = begin();
1187         for (; it != end(); ++it)
1188                 bibkeys.push_back(it->first);
1189         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1190         return bibkeys;
1191 }
1192
1193
1194 vector<docstring> const BiblioInfo::getFields() const
1195 {
1196         vector<docstring> bibfields;
1197         set<docstring>::const_iterator it = field_names_.begin();
1198         set<docstring>::const_iterator end = field_names_.end();
1199         for (; it != end; ++it)
1200                 bibfields.push_back(*it);
1201         sort(bibfields.begin(), bibfields.end());
1202         return bibfields;
1203 }
1204
1205
1206 vector<docstring> const BiblioInfo::getEntries() const
1207 {
1208         vector<docstring> bibentries;
1209         set<docstring>::const_iterator it = entry_types_.begin();
1210         set<docstring>::const_iterator end = entry_types_.end();
1211         for (; it != end; ++it)
1212                 bibentries.push_back(*it);
1213         sort(bibentries.begin(), bibentries.end());
1214         return bibentries;
1215 }
1216
1217
1218 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1219 {
1220         BiblioInfo::const_iterator it = find(key);
1221         if (it == end())
1222                 return docstring();
1223         BibTeXInfo const & data = it->second;
1224         return data.getAuthorOrEditorList(&buf, false);
1225 }
1226
1227
1228 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1229 {
1230         BiblioInfo::const_iterator it = find(key);
1231         if (it == end())
1232                 return docstring();
1233         BibTeXInfo const & data = it->second;
1234         return data.citeNumber();
1235 }
1236
1237
1238 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1239 {
1240         BiblioInfo::const_iterator it = find(key);
1241         if (it == end())
1242                 return docstring();
1243         BibTeXInfo const & data = it->second;
1244         docstring year = data.getYear();
1245         if (year.empty()) {
1246                 // let's try the crossrefs
1247                 vector<docstring> const xrefs = getXRefs(data);
1248                 if (xrefs.empty())
1249                         // no luck
1250                         return docstring();
1251                 vector<docstring>::const_iterator it = xrefs.begin();
1252                 vector<docstring>::const_iterator en = xrefs.end();
1253                 for (; it != en; ++it) {
1254                         BiblioInfo::const_iterator const xrefit = find(*it);
1255                         if (xrefit == end())
1256                                 continue;
1257                         BibTeXInfo const & xref_data = xrefit->second;
1258                         year = xref_data.getYear();
1259                         if (!year.empty())
1260                                 // success!
1261                                 break;
1262                 }
1263         }
1264         if (use_modifier && data.modifier() != 0)
1265                 year += data.modifier();
1266         return year;
1267 }
1268
1269
1270 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1271 {
1272         docstring const year = getYear(key, use_modifier);
1273         if (year.empty())
1274                 return buf.B_("No year");
1275         return year;
1276 }
1277
1278
1279 docstring const BiblioInfo::getInfo(docstring const & key,
1280         Buffer const & buf, CiteItem const & ci) const
1281 {
1282         BiblioInfo::const_iterator it = find(key);
1283         if (it == end())
1284                 return docstring(_("Bibliography entry not found!"));
1285         BibTeXInfo const & data = it->second;
1286         BibTeXInfoList xrefptrs;
1287         vector<docstring> const xrefs = getXRefs(data);
1288         if (!xrefs.empty()) {
1289                 vector<docstring>::const_iterator it = xrefs.begin();
1290                 vector<docstring>::const_iterator en = xrefs.end();
1291                 for (; it != en; ++it) {
1292                         BiblioInfo::const_iterator const xrefit = find(*it);
1293                         if (xrefit != end())
1294                                 xrefptrs.push_back(&(xrefit->second));
1295                 }
1296         }
1297         return data.getInfo(xrefptrs, buf, ci);
1298 }
1299
1300
1301 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1302         Buffer const & buf, string const & style, CiteItem const & ci) const
1303 {
1304         size_t max_size = ci.max_size;
1305         // shorter makes no sense
1306         LASSERT(max_size >= 16, max_size = 16);
1307
1308         // we can't display more than 10 of these, anyway
1309         bool const too_many_keys = keys.size() > 10;
1310         if (too_many_keys)
1311                 keys.resize(10);
1312
1313         CiteEngineType const engine_type = buf.params().citeEngineType();
1314         DocumentClass const & dc = buf.params().documentClass();
1315         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1316         docstring ret = format;
1317         vector<docstring>::const_iterator key = keys.begin();
1318         vector<docstring>::const_iterator ken = keys.end();
1319         for (int i = 0; key != ken; ++key, ++i) {
1320                 BiblioInfo::const_iterator it = find(*key);
1321                 BibTeXInfo empty_data;
1322                 empty_data.key(*key);
1323                 BibTeXInfo & data = empty_data;
1324                 vector<BibTeXInfo const *> xrefptrs;
1325                 if (it != end()) {
1326                         data = it->second;
1327                         vector<docstring> const xrefs = getXRefs(data);
1328                         if (!xrefs.empty()) {
1329                                 vector<docstring>::const_iterator it = xrefs.begin();
1330                                 vector<docstring>::const_iterator en = xrefs.end();
1331                                 for (; it != en; ++it) {
1332                                         BiblioInfo::const_iterator const xrefit = find(*it);
1333                                         if (xrefit != end())
1334                                                 xrefptrs.push_back(&(xrefit->second));
1335                                 }
1336                         }
1337                 }
1338                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1339         }
1340
1341         if (too_many_keys)
1342                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1343         support::truncateWithEllipsis(ret, max_size);
1344         return ret;
1345 }
1346
1347
1348 bool BiblioInfo::isBibtex(docstring const & key) const
1349 {
1350         docstring key1;
1351         split(key, key1, ',');
1352         BiblioInfo::const_iterator it = find(key1);
1353         if (it == end())
1354                 return false;
1355         return it->second.isBibTeX();
1356 }
1357
1358
1359 vector<docstring> const BiblioInfo::getCiteStrings(
1360         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1361         Buffer const & buf, CiteItem const & ci) const
1362 {
1363         if (empty())
1364                 return vector<docstring>();
1365
1366         string style;
1367         vector<docstring> vec(styles.size());
1368         for (size_t i = 0; i != vec.size(); ++i) {
1369                 style = styles[i].name;
1370                 vec[i] = getLabel(keys, buf, style, ci);
1371         }
1372
1373         return vec;
1374 }
1375
1376
1377 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1378 {
1379         bimap_.insert(info.begin(), info.end());
1380         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1381         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1382 }
1383
1384
1385 namespace {
1386
1387 // used in xhtml to sort a list of BibTeXInfo objects
1388 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1389 {
1390         docstring const lauth = lhs->getAuthorOrEditorList();
1391         docstring const rauth = rhs->getAuthorOrEditorList();
1392         docstring const lyear = lhs->getYear();
1393         docstring const ryear = rhs->getYear();
1394         docstring const ltitl = lhs->operator[]("title");
1395         docstring const rtitl = rhs->operator[]("title");
1396         return  (lauth < rauth)
1397                 || (lauth == rauth && lyear < ryear)
1398                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1399 }
1400
1401 }
1402
1403
1404 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1405 {
1406         cited_entries_.clear();
1407         // We are going to collect all the citation keys used in the document,
1408         // getting them from the TOC.
1409         // FIXME We may want to collect these differently, in the first case,
1410         // so that we might have them in order of appearance.
1411         set<docstring> citekeys;
1412         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1413         Toc::const_iterator it = toc->begin();
1414         Toc::const_iterator const en = toc->end();
1415         for (; it != en; ++it) {
1416                 if (it->str().empty())
1417                         continue;
1418                 vector<docstring> const keys = getVectorFromString(it->str());
1419                 citekeys.insert(keys.begin(), keys.end());
1420         }
1421         if (citekeys.empty())
1422                 return;
1423
1424         // We have a set of the keys used in this document.
1425         // We will now convert it to a list of the BibTeXInfo objects used in
1426         // this document...
1427         vector<BibTeXInfo const *> bi;
1428         set<docstring>::const_iterator cit = citekeys.begin();
1429         set<docstring>::const_iterator const cen = citekeys.end();
1430         for (; cit != cen; ++cit) {
1431                 BiblioInfo::const_iterator const bt = find(*cit);
1432                 if (bt == end() || !bt->second.isBibTeX())
1433                         continue;
1434                 bi.push_back(&(bt->second));
1435         }
1436         // ...and sort it.
1437         sort(bi.begin(), bi.end(), lSorter);
1438
1439         // Now we can write the sorted keys
1440         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1441         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1442         for (; bit != ben; ++bit)
1443                 cited_entries_.push_back((*bit)->key());
1444 }
1445
1446
1447 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1448 {
1449         collectCitedEntries(buf);
1450         CiteEngineType const engine_type = buf.params().citeEngineType();
1451         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1452
1453         int keynumber = 0;
1454         char modifier = 0;
1455         // used to remember the last one we saw
1456         // we'll be comparing entries to see if we need to add
1457         // modifiers, like "1984a"
1458         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1459
1460         vector<docstring>::const_iterator it = cited_entries_.begin();
1461         vector<docstring>::const_iterator const en = cited_entries_.end();
1462         for (; it != en; ++it) {
1463                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1464                 // this shouldn't happen, but...
1465                 if (biit == bimap_.end())
1466                         // ...fail gracefully, anyway.
1467                         continue;
1468                 BibTeXInfo & entry = biit->second;
1469                 if (numbers) {
1470                         docstring const num = convert<docstring>(++keynumber);
1471                         entry.setCiteNumber(num);
1472                 } else {
1473                         // The first test here is checking whether this is the first
1474                         // time through the loop. If so, then we do not have anything
1475                         // with which to compare.
1476                         if (last != bimap_.end()
1477                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1478                             // we access the year via getYear() so as to get it from the xref,
1479                             // if we need to do so
1480                             && getYear(entry.key()) == getYear(last->second.key())) {
1481                                 if (modifier == 0) {
1482                                         // so the last one should have been 'a'
1483                                         last->second.setModifier('a');
1484                                         modifier = 'b';
1485                                 } else if (modifier == 'z')
1486                                         modifier = 'A';
1487                                 else
1488                                         modifier++;
1489                         } else {
1490                                 modifier = 0;
1491                         }
1492                         entry.setModifier(modifier);
1493                         // remember the last one
1494                         last = biit;
1495                 }
1496         }
1497         // Set the labels
1498         it = cited_entries_.begin();
1499         for (; it != en; ++it) {
1500                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1501                 // this shouldn't happen, but...
1502                 if (biit == bimap_.end())
1503                         // ...fail gracefully, anyway.
1504                         continue;
1505                 BibTeXInfo & entry = biit->second;
1506                 if (numbers) {
1507                         entry.label(entry.citeNumber());
1508                 } else {
1509                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1510                         // we do it this way so as to access the xref, if necessary
1511                         // note that this also gives us the modifier
1512                         docstring const year = getYear(*it, buf, true);
1513                         if (!auth.empty() && !year.empty())
1514                                 entry.label(auth + ' ' + year);
1515                         else
1516                                 entry.label(entry.key());
1517                 }
1518         }
1519 }
1520
1521
1522 //////////////////////////////////////////////////////////////////////
1523 //
1524 // CitationStyle
1525 //
1526 //////////////////////////////////////////////////////////////////////
1527
1528
1529 CitationStyle citationStyleFromString(string const & command,
1530                                       BufferParams const & params)
1531 {
1532         CitationStyle cs;
1533         if (command.empty())
1534                 return cs;
1535
1536         string const alias = params.getCiteAlias(command);
1537         string cmd = alias.empty() ? command : alias;
1538         if (isUpperCase(command[0])) {
1539                 cs.forceUpperCase = true;
1540                 cmd[0] = lowercase(cmd[0]);
1541         }
1542
1543         size_t const n = command.size() - 1;
1544         if (command[n] == '*') {
1545                 cs.hasStarredVersion = true;
1546                 if (suffixIs(cmd, '*'))
1547                         cmd = cmd.substr(0, cmd.size() - 1);
1548         }
1549
1550         cs.name = cmd;
1551         return cs;
1552 }
1553
1554
1555 string citationStyleToString(const CitationStyle & cs, bool const latex)
1556 {
1557         string cmd = latex ? cs.cmd : cs.name;
1558         if (cs.forceUpperCase)
1559                 cmd[0] = uppercase(cmd[0]);
1560         if (cs.hasStarredVersion)
1561                 cmd += '*';
1562         return cmd;
1563 }
1564
1565 } // namespace lyx