src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18 #include "Buffer.h"
  19 #include "BufferParams.h"
  20 #include "buffer_funcs.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "InsetIterator.h"
  24 #include "Language.h"
  25 #include "output_xhtml.h"
  26 #include "Paragraph.h"
  27 #include "TextClass.h"
  28 #include "TocBackend.h"
  29
  30 #include "support/convert.h"
  31 #include "support/debug.h"
  32 #include "support/docstream.h"
  33 #include "support/gettext.h"
  34 #include "support/lassert.h"
  35 #include "support/lstrings.h"
  36 #include "support/regex.h"
  37 #include "support/textutils.h"
  38
  39 #include <map>
  40 #include <set>
  41
  42 using namespace std;
  43 using namespace lyx::support;
  44
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 // Remove placeholders from names
  51 docstring renormalize(docstring const & input)
  52 {
  53         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  54         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  55 }
  56
  57
  58 // Split the surname into prefix ("von-part") and family name
  59 pair<docstring, docstring> parseSurname(docstring const & sname)
  60 {
  61         // Split the surname into its tokens
  62         vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
  63         if (pieces.size() < 2)
  64                 return make_pair(docstring(), sname);
  65
  66         // Now we look for pieces that begin with a lower case letter.
  67         // All except for the very last token constitute the "von-part".
  68         docstring prefix;
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator const en = pieces.end();
  71         bool first = true;
  72         for (; it != en; ++it) {
  73                 if ((*it).empty())
  74                         continue;
  75                 // If this is the last piece, then what we now have is
  76                 // the family name, notwithstanding the casing.
  77                 if (it + 1 == en)
  78                         break;
  79                 char_type const c = (*it)[0];
  80                 // If the piece starts with a upper case char, we assume
  81                 // this is part of the surname.
  82                 if (!isLower(c))
  83                         break;
  84                 // Nothing of the former, so add this piece to the prename
  85                 if (!first)
  86                         prefix += " ";
  87                 else
  88                         first = false;
  89                 prefix += *it;
  90         }
  91
  92         // Reconstruct the family name.
  93         // Note that if we left the loop with because it + 1 == en,
  94         // then this will still do the right thing, i.e., make surname
  95         // just be the last piece.
  96         docstring surname;
  97         first = true;
  98         for (; it != en; ++it) {
  99                 if (!first)
 100                         surname += " ";
 101                 else
 102                         first = false;
 103                 surname += *it;
 104         }
 105         return make_pair(prefix, surname);
 106 }
 107
 108
 109 struct name_parts {
 110         docstring surname;
 111         docstring prename;
 112         docstring suffix;
 113         docstring prefix;
 114 };
 115
 116
 117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
 118 name_parts nameParts(docstring const & iname)
 119 {
 120         name_parts res;
 121         if (iname.empty())
 122                 return res;
 123
 124         // First we check for goupings (via {...}) and replace blanks and
 125         // commas inside groups with temporary placeholders
 126         docstring name;
 127         int gl = 0;
 128         docstring::const_iterator p = iname.begin();
 129         while (p != iname.end()) {
 130                 // count grouping level
 131                 if (*p == '{')
 132                         ++gl;
 133                 else if (*p == '}')
 134                         --gl;
 135                 // generate string with probable placeholders
 136                 if (*p == ' ' && gl > 0)
 137                         name += from_ascii("$$space!");
 138                 else if (*p == ',' && gl > 0)
 139                         name += from_ascii("$$comma!");
 140                 else
 141                         name += *p;
 142                 ++p;
 143         }
 144
 145         // Now we look for a comma, and take the last name to be everything
 146         // preceding the right-most one, so that we also get the name suffix
 147         // (aka "jr" part).
 148         vector<docstring> pieces = getVectorFromString(name);
 149         if (pieces.size() > 1) {
 150                 // Whether we have a name suffix or not, the prename is
 151                 // always last item
 152                 res.prename = renormalize(pieces.back());
 153                 // The family name, conversely, is always the first item.
 154                 // However, it might contain a prefix (aka "von" part)
 155                 docstring const sname = pieces.front();
 156                 res.prefix = renormalize(parseSurname(sname).first);
 157                 res.surname = renormalize(parseSurname(sname).second);
 158                 // If we have three pieces (the maximum allowed by BibTeX),
 159                 // the second one is the name suffix.
 160                 if (pieces.size() > 2)
 161                         res.suffix = renormalize(pieces.at(1));
 162                 return res;
 163         }
 164
 165         // OK, so now we want to look for the last name.
 166         // Split on spaces, to get various tokens.
 167         pieces = getVectorFromString(name, from_ascii(" "));
 168         // No space: Only a family name given
 169         if (pieces.size() < 2) {
 170                 res.surname = renormalize(pieces.back());
 171                 return res;
 172         }
 173         // If we get two pieces, assume "prename surname"
 174         if (pieces.size() == 2) {
 175                 res.prename = renormalize(pieces.front());
 176                 res.surname = renormalize(pieces.back());
 177                 return res;
 178         }
 179
 180         // More than 3 pieces: A name prefix (aka "von" part) might be included.
 181         // We look for the first piece that begins with a lower case letter
 182         // (which is the name prefix, if it is not the last token) or the last token.
 183         docstring prename;
 184         vector<docstring>::const_iterator it = pieces.begin();
 185         vector<docstring>::const_iterator const en = pieces.end();
 186         bool first = true;
 187         for (; it != en; ++it) {
 188                 if ((*it).empty())
 189                         continue;
 190                 char_type const c = (*it)[0];
 191                 // If the piece starts with a lower case char, we assume
 192                 // this is the name prefix and thus prename is complete.
 193                 if (isLower(c))
 194                         break;
 195                 // Same if this is the last piece, which is always the surname.
 196                 if (it + 1 == en)
 197                         break;
 198                 // Nothing of the former, so add this piece to the prename
 199                 if (!first)
 200                         prename += " ";
 201                 else
 202                         first = false;
 203                 prename += *it;
 204         }
 205
 206         // Now reconstruct the family name and strip the prefix.
 207         // Note that if we left the loop because it + 1 == en,
 208         // then this will still do the right thing, i.e., make surname
 209         // just be the last piece.
 210         docstring surname;
 211         first = true;
 212         for (; it != en; ++it) {
 213                 if (!first)
 214                         surname += " ";
 215                 else
 216                         first = false;
 217                 surname += *it;
 218         }
 219         res.prename = renormalize(prename);
 220         res.prefix = renormalize(parseSurname(surname).first);
 221         res.surname = renormalize(parseSurname(surname).second);
 222         return res;
 223 }
 224
 225
 226 docstring constructName(docstring const & name, string const scheme)
 227 {
 228         // re-constructs a name from name parts according
 229         // to a given scheme
 230         docstring const prename = nameParts(name).prename;
 231         docstring const surname = nameParts(name).surname;
 232         docstring const prefix = nameParts(name).prefix;
 233         docstring const suffix = nameParts(name).suffix;
 234         string res = scheme;
 235         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 236         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 237         static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 238         smatch sub;
 239         if (regex_match(scheme, sub, reg1)) {
 240                 res = sub.str(1);
 241                 if (!prename.empty())
 242                         res += sub.str(3);
 243                 res += sub.str(5);
 244         }
 245         if (regex_match(res, sub, reg2)) {
 246                 res = sub.str(1);
 247                 if (!suffix.empty())
 248                         res += sub.str(3);
 249                 res += sub.str(5);
 250         }
 251         if (regex_match(res, sub, reg3)) {
 252                 res = sub.str(1);
 253                 if (!prefix.empty())
 254                         res += sub.str(3);
 255                 res += sub.str(5);
 256         }
 257         docstring result = from_ascii(res);
 258         result = subst(result, from_ascii("%prename%"), prename);
 259         result = subst(result, from_ascii("%surname%"), surname);
 260         result = subst(result, from_ascii("%prefix%"), prefix);
 261         result = subst(result, from_ascii("%suffix%"), suffix);
 262         return result;
 263 }
 264
 265
 266 vector<docstring> const getAuthors(docstring const & author)
 267 {
 268         // We check for goupings (via {...}) and only consider " and "
 269         // outside groups as author separator. This is to account
 270         // for cases such as {{Barnes and Noble, Inc.}}, which
 271         // need to be treated as one single family name.
 272         // We use temporary placeholders in order to differentiate the
 273         // diverse " and " cases.
 274
 275         // First, we temporarily replace all ampersands. It is rather unusual
 276         // in author names, but can happen (consider cases such as "C \& A Corp.").
 277         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 278         // Then, we temporarily make all " and " strings to ampersands in order
 279         // to handle them later on a per-char level.
 280         iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
 281         // Now we traverse through the string and replace the "&" by the proper
 282         // output in- and outside groups
 283         docstring name;
 284         int gl = 0;
 285         docstring::const_iterator p = iname.begin();
 286         while (p != iname.end()) {
 287                 // count grouping level
 288                 if (*p == '{')
 289                         ++gl;
 290                 else if (*p == '}')
 291                         --gl;
 292                 // generate string with probable placeholders
 293                 if (*p == '&') {
 294                         if (gl > 0)
 295                                 // Inside groups, we output "and"
 296                                 name += from_ascii("and");
 297                         else
 298                                 // Outside groups, we output a separator
 299                                 name += from_ascii("$$namesep!");
 300                 }
 301                 else
 302                         name += *p;
 303                 ++p;
 304         }
 305
 306         // re-insert the literal ampersands
 307         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 308
 309         // Now construct the actual vector
 310         return getVectorFromString(name, from_ascii(" $$namesep! "));
 311 }
 312
 313
 314 bool multipleAuthors(docstring const author)
 315 {
 316         return getAuthors(author).size() > 1;
 317 }
 318
 319
 320 // converts a string containing LaTeX commands into unicode
 321 // for display.
 322 docstring convertLaTeXCommands(docstring const & str)
 323 {
 324         docstring val = str;
 325         docstring ret;
 326
 327         bool scanning_cmd = false;
 328         bool scanning_math = false;
 329         bool escaped = false; // used to catch \$, etc.
 330         while (!val.empty()) {
 331                 char_type const ch = val[0];
 332
 333                 // if we're scanning math, we output everything until we
 334                 // find an unescaped $, at which point we break out.
 335                 if (scanning_math) {
 336                         if (escaped)
 337                                 escaped = false;
 338                         else if (ch == '\\')
 339                                 escaped = true;
 340                         else if (ch == '$')
 341                                 scanning_math = false;
 342                         ret += ch;
 343                         val = val.substr(1);
 344                         continue;
 345                 }
 346
 347                 // if we're scanning a command name, then we just
 348                 // discard characters until we hit something that
 349                 // isn't alpha.
 350                 if (scanning_cmd) {
 351                         if (isAlphaASCII(ch)) {
 352                                 val = val.substr(1);
 353                                 escaped = false;
 354                                 continue;
 355                         }
 356                         // so we're done with this command.
 357                         // now we fall through and check this character.
 358                         scanning_cmd = false;
 359                 }
 360
 361                 // was the last character a \? If so, then this is something like:
 362                 // \\ or \$, so we'll just output it. That's probably not always right...
 363                 if (escaped) {
 364                         // exception: output \, as THIN SPACE
 365                         if (ch == ',')
 366                                 ret.push_back(0x2009);
 367                         else
 368                                 ret += ch;
 369                         val = val.substr(1);
 370                         escaped = false;
 371                         continue;
 372                 }
 373
 374                 if (ch == '$') {
 375                         ret += ch;
 376                         val = val.substr(1);
 377                         scanning_math = true;
 378                         continue;
 379                 }
 380
 381                 // Change text mode accents in the form
 382                 // {\v a} to \v{a} (see #9340).
 383                 // FIXME: This is a sort of mini-tex2lyx.
 384                 //        Use the real tex2lyx instead!
 385                 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
 386                 if (lyx::regex_search(to_utf8(val), tma_reg)) {
 387                         val = val.substr(1);
 388                         val.replace(2, 1, from_ascii("{"));
 389                         continue;
 390                 }
 391
 392                 // Apart from the above, we just ignore braces
 393                 if (ch == '{' || ch == '}') {
 394                         val = val.substr(1);
 395                         continue;
 396                 }
 397
 398                 // we're going to check things that look like commands, so if
 399                 // this doesn't, just output it.
 400                 if (ch != '\\') {
 401                         ret += ch;
 402                         val = val.substr(1);
 403                         continue;
 404                 }
 405
 406                 // ok, could be a command of some sort
 407                 // let's see if it corresponds to some unicode
 408                 // unicodesymbols has things in the form: \"{u},
 409                 // whereas we may see things like: \"u. So we'll
 410                 // look for that and change it, if necessary.
 411                 // FIXME: This is a sort of mini-tex2lyx.
 412                 //        Use the real tex2lyx instead!
 413                 static lyx::regex const reg("^\\\\\\W\\w");
 414                 if (lyx::regex_search(to_utf8(val), reg)) {
 415                         val.insert(3, from_ascii("}"));
 416                         val.insert(2, from_ascii("{"));
 417                 }
 418                 bool termination;
 419                 docstring rem;
 420                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 421                                 Encodings::TEXT_CMD, termination, rem);
 422                 if (!cnvtd.empty()) {
 423                         // it did, so we'll take that bit and proceed with what's left
 424                         ret += cnvtd;
 425                         val = rem;
 426                         continue;
 427                 }
 428                 // it's a command of some sort
 429                 scanning_cmd = true;
 430                 escaped = true;
 431                 val = val.substr(1);
 432         }
 433         return ret;
 434 }
 435
 436
 437 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 438 docstring processRichtext(docstring const & str, bool richtext)
 439 {
 440         docstring val = str;
 441         docstring ret;
 442
 443         bool scanning_rich = false;
 444         while (!val.empty()) {
 445                 char_type const ch = val[0];
 446                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 447                         // beginning of rich text
 448                         scanning_rich = true;
 449                         val = val.substr(2);
 450                         continue;
 451                 }
 452                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 453                         // end of rich text
 454                         scanning_rich = false;
 455                         val = val.substr(2);
 456                         continue;
 457                 }
 458                 if (richtext) {
 459                         if (scanning_rich)
 460                                 ret += ch;
 461                         else {
 462                                 // we need to escape '<' and '>'
 463                                 if (ch == '<')
 464                                         ret += "&lt;";
 465                                 else if (ch == '>')
 466                                         ret += "&gt;";
 467                                 else
 468                                         ret += ch;
 469                         }
 470                 } else if (!scanning_rich /* && !richtext */)
 471                         ret += ch;
 472                 // else the character is discarded, which will happen only if
 473                 // richtext == false and we are scanning rich text
 474                 val = val.substr(1);
 475         }
 476         return ret;
 477 }
 478
 479 } // anon namespace
 480
 481
 482 //////////////////////////////////////////////////////////////////////
 483 //
 484 // BibTeXInfo
 485 //
 486 //////////////////////////////////////////////////////////////////////
 487
 488 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 489         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 490           modifier_(0)
 491 {}
 492
 493
 494
 495 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 496                                           bool full, bool forceshort) const
 497 {
 498         docstring author = operator[]("author");
 499         if (author.empty())
 500                 author = operator[]("editor");
 501
 502         return getAuthorList(buf, author, full, forceshort);
 503 }
 504
 505
 506 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 507                 docstring const & author, bool const full, bool const forceshort,
 508                 bool const allnames, bool const beginning) const
 509 {
 510         // Maxnames treshold depend on engine
 511         size_t maxnames = buf ?
 512                 buf->params().documentClass().max_citenames() : 2;
 513
 514         if (!is_bibtex_) {
 515                 docstring const opt = label();
 516                 if (opt.empty())
 517                         return docstring();
 518
 519                 docstring authors;
 520                 docstring const remainder = trim(split(opt, authors, '('));
 521                 if (remainder.empty())
 522                         // in this case, we didn't find a "(",
 523                         // so we don't have author (year)
 524                         return docstring();
 525                 return authors;
 526         }
 527
 528         if (author.empty())
 529                 return author;
 530
 531         // OK, we've got some names. Let's format them.
 532         // Try to split the author list
 533         vector<docstring> const authors = getAuthors(author);
 534
 535         docstring retval;
 536
 537         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 538                                                : ENGINE_TYPE_DEFAULT;
 539
 540         // These are defined in the styles
 541         string const etal =
 542                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
 543                     : " et al.";
 544         string const namesep =
 545                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
 546                    : ", ";
 547         string const lastnamesep =
 548                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
 549                     : ", and ";
 550         string const pairnamesep =
 551                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
 552                      : " and ";
 553         string firstnameform =
 554                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 555                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 556         if (!beginning)
 557                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 558                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 559         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 560                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 561         if (!beginning)
 562                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 563                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 564         string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
 565                              : "{%prefix%[[%prefix% ]]}%surname%";
 566
 567         // Shorten the list (with et al.) if forceshort is set
 568         // and the list can actually be shortened, else if maxcitenames
 569         // is passed and full is not set.
 570         bool shorten = forceshort && authors.size() > 1;
 571         vector<docstring>::const_iterator it = authors.begin();
 572         vector<docstring>::const_iterator en = authors.end();
 573         for (size_t i = 0; it != en; ++it, ++i) {
 574                 if (i >= maxnames && !full) {
 575                         shorten = true;
 576                         break;
 577                 }
 578                 if (*it == "others") {
 579                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 580                         break;
 581                 }
 582                 if (i > 0 && i == authors.size() - 1) {
 583                         if (authors.size() == 2)
 584                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 585                         else
 586                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 587                 } else if (i > 0)
 588                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 589                 if (allnames)
 590                         retval += (i == 0) ? constructName(*it, firstnameform)
 591                                 : constructName(*it, othernameform);
 592                 else
 593                         retval += constructName(*it, citenameform);
 594         }
 595         if (shorten) {
 596                 if (allnames)
 597                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 598                 else
 599                         retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 600         }
 601
 602         return convertLaTeXCommands(retval);
 603 }
 604
 605
 606 docstring const BibTeXInfo::getYear() const
 607 {
 608         if (is_bibtex_) {
 609                 // first try legacy year field
 610                 docstring year = operator[]("year");
 611                 if (!year.empty())
 612                         return year;
 613                 // now try biblatex's date field
 614                 year = operator[]("date");
 615                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 616                 // We only want the years.
 617                 static regex const yreg("[-]?([\\d]{4}).*");
 618                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 619                 smatch sm;
 620                 string const date = to_utf8(year);
 621                 if (!regex_match(date, sm, yreg))
 622                         // cannot parse year.
 623                         return docstring();
 624                 year = from_ascii(sm[1]);
 625                 // check for an endyear
 626                 if (regex_match(date, sm, ereg))
 627                         year += char_type(0x2013) + from_ascii(sm[1]);
 628                 return year;
 629         }
 630
 631         docstring const opt = label();
 632         if (opt.empty())
 633                 return docstring();
 634
 635         docstring authors;
 636         docstring tmp = split(opt, authors, '(');
 637         if (tmp.empty())
 638                 // we don't have author (year)
 639                 return docstring();
 640         docstring year;
 641         tmp = split(tmp, year, ')');
 642         return year;
 643 }
 644
 645
 646 namespace {
 647
 648 docstring parseOptions(docstring const & format, string & optkey,
 649                     docstring & ifpart, docstring & elsepart);
 650
 651 // Calls parseOptions to deal with an embedded option, such as:
 652 //   {%number%[[, no.~%number%]]}
 653 // which must appear at the start of format. ifelsepart gets the
 654 // whole of the option, and we return what's left after the option.
 655 // we return format if there is an error.
 656 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 657 {
 658         LASSERT(format[0] == '{' && format[1] == '%', return format);
 659         string optkey;
 660         docstring ifpart;
 661         docstring elsepart;
 662         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 663         if (format == rest) { // parse error
 664                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 665                 return format;
 666         }
 667         LASSERT(rest.size() <= format.size(),
 668                 { ifelsepart = docstring(); return format; });
 669         ifelsepart = format.substr(0, format.size() - rest.size());
 670         return rest;
 671 }
 672
 673
 674 // Gets a "clause" from a format string, where the clause is
 675 // delimited by '[[' and ']]'. Returns what is left after the
 676 // clause is removed, and returns format if there is an error.
 677 docstring getClause(docstring const & format, docstring & clause)
 678 {
 679         docstring fmt = format;
 680         // remove '[['
 681         fmt = fmt.substr(2);
 682         // we'll remove characters from the front of fmt as we
 683         // deal with them
 684         while (!fmt.empty()) {
 685                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 686                         // that's the end
 687                         fmt = fmt.substr(2);
 688                         break;
 689                 }
 690                 // check for an embedded option
 691                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 692                         docstring part;
 693                         docstring const rest = parseEmbeddedOption(fmt, part);
 694                         if (fmt == rest) {
 695                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 696                                 return format;
 697                         }
 698                         clause += part;
 699                         fmt = rest;
 700                 } else { // it's just a normal character
 701                                 clause += fmt[0];
 702                                 fmt = fmt.substr(1);
 703                 }
 704         }
 705         return fmt;
 706 }
 707
 708
 709 // parse an options string, which must appear at the start of the
 710 // format parameter. puts the parsed bits in optkey, ifpart, and
 711 // elsepart and returns what's left after the option is removed.
 712 // if there's an error, it returns format itself.
 713 docstring parseOptions(docstring const & format, string & optkey,
 714                     docstring & ifpart, docstring & elsepart)
 715 {
 716         LASSERT(format[0] == '{' && format[1] == '%', return format);
 717         // strip '{%'
 718         docstring fmt = format.substr(2);
 719         size_t pos = fmt.find('%'); // end of key
 720         if (pos == string::npos) {
 721                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 722                 return format;
 723         }
 724         optkey = to_utf8(fmt.substr(0, pos));
 725         fmt = fmt.substr(pos + 1);
 726         // [[format]] should be next
 727         if (fmt[0] != '[' || fmt[1] != '[') {
 728                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 729                 return format;
 730         }
 731
 732         docstring curfmt = fmt;
 733         fmt = getClause(curfmt, ifpart);
 734         if (fmt == curfmt) {
 735                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 736                 return format;
 737         }
 738
 739         if (fmt[0] == '}') // we're done, no else clause
 740                 return fmt.substr(1);
 741
 742         // else part should follow
 743         if (fmt[0] != '[' || fmt[1] != '[') {
 744                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 745                 return format;
 746         }
 747
 748         curfmt = fmt;
 749         fmt = getClause(curfmt, elsepart);
 750         // we should be done
 751         if (fmt == curfmt || fmt[0] != '}') {
 752                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 753                 return format;
 754         }
 755         return fmt.substr(1);
 756 }
 757
 758
 759 } // anon namespace
 760
 761 /* FIXME
 762 Bug #9131 revealed an oddity in how we are generating citation information
 763 when more than one key is given. We end up building a longer and longer format
 764 string as we go, which we then have to re-parse, over and over and over again,
 765 rather than generating the information for the individual keys and then putting
 766 all of that together. We do that to deal with the way separators work, from what
 767 I can tell, but it still feels like a hack. Fixing this would require quite a
 768 bit of work, however.
 769 */
 770 docstring BibTeXInfo::expandFormat(docstring const & format,
 771                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 772                 CiteItem const & ci, bool next, bool second) const
 773 {
 774         // incorrect use of macros could put us in an infinite loop
 775         static int const max_passes = 5000;
 776         // the use of overly large keys can lead to performance problems, due
 777         // to eventual attempts to convert LaTeX macros to unicode. See bug
 778         // #8944. By default, the size is limited to 128 (in CiteItem), but
 779         // for specific purposes (such as XHTML export), it needs to be enlarged
 780         // This is perhaps not the best solution, but it will have to do for now.
 781         size_t const max_keysize = ci.max_key_size;
 782         odocstringstream ret; // return value
 783         string key;
 784         bool scanning_key = false;
 785         bool scanning_rich = false;
 786
 787         CiteEngineType const engine_type = buf.params().citeEngineType();
 788         docstring fmt = format;
 789         // we'll remove characters from the front of fmt as we
 790         // deal with them
 791         while (!fmt.empty()) {
 792                 if (counter > max_passes) {
 793                         LYXERR0("Recursion limit reached while parsing `"
 794                                 << format << "'.");
 795                         return _("ERROR!");
 796                 }
 797
 798                 char_type thischar = fmt[0];
 799                 if (thischar == '%') {
 800                         // beginning or end of key
 801                         if (scanning_key) {
 802                                 // end of key
 803                                 scanning_key = false;
 804                                 // so we replace the key with its value, which may be empty
 805                                 if (key[0] == '!') {
 806                                         // macro
 807                                         string const val =
 808                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 809                                         fmt = from_utf8(val) + fmt.substr(1);
 810                                         counter += 1;
 811                                         continue;
 812                                 } else if (key[0] == '_') {
 813                                         // a translatable bit
 814                                         string const val =
 815                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 816                                         docstring const trans =
 817                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 818                                         ret << trans;
 819                                 } else {
 820                                         docstring const val =
 821                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 822                                         if (!scanning_rich)
 823                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 824                                         ret << val;
 825                                         if (!scanning_rich)
 826                                                 ret << from_ascii("{!</span>!}");
 827                                 }
 828                         } else {
 829                                 // beginning of key
 830                                 key.clear();
 831                                 scanning_key = true;
 832                         }
 833                 }
 834                 else if (thischar == '{') {
 835                         // beginning of option?
 836                         if (scanning_key) {
 837                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 838                                 return _("ERROR!");
 839                         }
 840                         if (fmt.size() > 1) {
 841                                 if (fmt[1] == '%') {
 842                                         // it is the beginning of an optional format
 843                                         string optkey;
 844                                         docstring ifpart;
 845                                         docstring elsepart;
 846                                         docstring const newfmt =
 847                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 848                                         if (newfmt == fmt) // parse error
 849                                                 return _("ERROR!");
 850                                         fmt = newfmt;
 851                                         docstring const val =
 852                                                 getValueForKey(optkey, buf, ci, xrefs);
 853                                         if (optkey == "next" && next)
 854                                                 ret << ifpart; // without expansion
 855                                         else if (optkey == "second" && second) {
 856                                                 int newcounter = 0;
 857                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 858                                                         ci, next);
 859                                         } else if (!val.empty()) {
 860                                                 int newcounter = 0;
 861                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 862                                                         ci, next);
 863                                         } else if (!elsepart.empty()) {
 864                                                 int newcounter = 0;
 865                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 866                                                         ci, next);
 867                                         }
 868                                         // fmt will have been shortened for us already
 869                                         continue;
 870                                 }
 871                                 if (fmt[1] == '!') {
 872                                         // beginning of rich text
 873                                         scanning_rich = true;
 874                                         fmt = fmt.substr(2);
 875                                         ret << from_ascii("{!");
 876                                         continue;
 877                                 }
 878                         }
 879                         // we are here if '{' was not followed by % or !.
 880                         // So it's just a character.
 881                         ret << thischar;
 882                 }
 883                 else if (scanning_rich && thischar == '!'
 884                          && fmt.size() > 1 && fmt[1] == '}') {
 885                         // end of rich text
 886                         scanning_rich = false;
 887                         fmt = fmt.substr(2);
 888                         ret << from_ascii("!}");
 889                         continue;
 890                 }
 891                 else if (scanning_key)
 892                         key += char(thischar);
 893                 else {
 894                         try {
 895                                 ret.put(thischar);
 896                         } catch (EncodingException & /* e */) {
 897                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 898                         }
 899                 }
 900                 fmt = fmt.substr(1);
 901         } // for loop
 902         if (scanning_key) {
 903                 LYXERR0("Never found end of key in `" << format << "'!");
 904                 return _("ERROR!");
 905         }
 906         if (scanning_rich) {
 907                 LYXERR0("Never found end of rich text in `" << format << "'!");
 908                 return _("ERROR!");
 909         }
 910         return ret.str();
 911 }
 912
 913
 914 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 915         Buffer const & buf, CiteItem const & ci) const
 916 {
 917         bool const richtext = ci.richtext;
 918
 919         if (!richtext && !info_.empty())
 920                 return info_;
 921         if (richtext && !info_richtext_.empty())
 922                 return info_richtext_;
 923
 924         if (!is_bibtex_) {
 925                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 926                 info_ = it->second;
 927                 return info_;
 928         }
 929
 930         CiteEngineType const engine_type = buf.params().citeEngineType();
 931         DocumentClass const & dc = buf.params().documentClass();
 932         docstring const & format =
 933                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 934         int counter = 0;
 935         info_ = expandFormat(format, xrefs, counter, buf,
 936                 ci, false, false);
 937
 938         if (info_.empty()) {
 939                 // this probably shouldn't happen
 940                 return info_;
 941         }
 942
 943         if (richtext) {
 944                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 945                 return info_richtext_;
 946         }
 947
 948         info_ = convertLaTeXCommands(processRichtext(info_, false));
 949         return info_;
 950 }
 951
 952
 953 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 954         Buffer const & buf, docstring const & format,
 955         CiteItem const & ci, bool next, bool second) const
 956 {
 957         docstring loclabel;
 958
 959         int counter = 0;
 960         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 961
 962         if (!loclabel.empty() && !next) {
 963                 loclabel = processRichtext(loclabel, ci.richtext);
 964                 loclabel = convertLaTeXCommands(loclabel);
 965         }
 966
 967         return loclabel;
 968 }
 969
 970
 971 docstring const & BibTeXInfo::operator[](docstring const & field) const
 972 {
 973         BibTeXInfo::const_iterator it = find(field);
 974         if (it != end())
 975                 return it->second;
 976         static docstring const empty_value = docstring();
 977         return empty_value;
 978 }
 979
 980
 981 docstring const & BibTeXInfo::operator[](string const & field) const
 982 {
 983         return operator[](from_ascii(field));
 984 }
 985
 986
 987 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 988         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 989 {
 990         // anything less is pointless
 991         LASSERT(maxsize >= 16, maxsize = 16);
 992         string key = oldkey;
 993         bool cleanit = false;
 994         if (prefixIs(oldkey, "clean:")) {
 995                 key = oldkey.substr(6);
 996                 cleanit = true;
 997         }
 998
 999         docstring ret = operator[](key);
1000         if (ret.empty() && !xrefs.empty()) {
1001                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1002                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1003                 for (; it != en; ++it) {
1004                         if (*it && !(**it)[key].empty()) {
1005                                 ret = (**it)[key];
1006                                 break;
1007                         }
1008                 }
1009         }
1010         if (ret.empty()) {
1011                 // some special keys
1012                 // FIXME: dialog, textbefore and textafter have nothing to do with this
1013                 if (key == "dialog" && ci.context == CiteItem::Dialog)
1014                         ret = from_ascii("x"); // any non-empty string will do
1015                 else if (key == "export" && ci.context == CiteItem::Export)
1016                         ret = from_ascii("x"); // any non-empty string will do
1017                 else if (key == "ifstar" && ci.Starred)
1018                         ret = from_ascii("x"); // any non-empty string will do
1019                 else if (key == "ifqualified" && ci.isQualified)
1020                         ret = from_ascii("x"); // any non-empty string will do
1021                 else if (key == "entrytype")
1022                         ret = entry_type_;
1023                 else if (prefixIs(key, "ifentrytype:")
1024                          && from_ascii(key.substr(12)) == entry_type_)
1025                         ret = from_ascii("x"); // any non-empty string will do
1026                 else if (key == "key")
1027                         ret = bib_key_;
1028                 else if (key == "label")
1029                         ret = label_;
1030                 else if (key == "modifier" && modifier_ != 0)
1031                         ret = modifier_;
1032                 else if (key == "numericallabel")
1033                         ret = cite_number_;
1034                 else if (prefixIs(key, "ifmultiple:")) {
1035                         // Return whether we have multiple authors
1036                         docstring const kind = operator[](from_ascii(key.substr(11)));
1037                         if (multipleAuthors(kind))
1038                                 ret = from_ascii("x"); // any non-empty string will do
1039                 }
1040                 else if (prefixIs(key, "abbrvnames:")) {
1041                         // Special key to provide abbreviated name list,
1042                         // with respect to maxcitenames. Suitable for Bibliography
1043                         // beginnings.
1044                         docstring const kind = operator[](from_ascii(key.substr(11)));
1045                         ret = getAuthorList(&buf, kind, false, false, true);
1046                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1047                                 ret[0] = uppercase(ret[0]);
1048                 } else if (prefixIs(key, "fullnames:")) {
1049                         // Return a full name list. Suitable for Bibliography
1050                         // beginnings.
1051                         docstring const kind = operator[](from_ascii(key.substr(10)));
1052                         ret = getAuthorList(&buf, kind, true, false, true);
1053                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1054                                 ret[0] = uppercase(ret[0]);
1055                 } else if (prefixIs(key, "forceabbrvnames:")) {
1056                         // Special key to provide abbreviated name lists,
1057                         // irrespective of maxcitenames. Suitable for Bibliography
1058                         // beginnings.
1059                         docstring const kind = operator[](from_ascii(key.substr(15)));
1060                         ret = getAuthorList(&buf, kind, false, true, true);
1061                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1062                                 ret[0] = uppercase(ret[0]);
1063                 } else if (prefixIs(key, "abbrvbynames:")) {
1064                         // Special key to provide abbreviated name list,
1065                         // with respect to maxcitenames. Suitable for further names inside a
1066                         // bibliography item // (such as "ed. by ...")
1067                         docstring const kind = operator[](from_ascii(key.substr(11)));
1068                         ret = getAuthorList(&buf, kind, false, false, true, false);
1069                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1070                                 ret[0] = uppercase(ret[0]);
1071                 } else if (prefixIs(key, "fullbynames:")) {
1072                         // Return a full name list. Suitable for further names inside a
1073                         // bibliography item // (such as "ed. by ...")
1074                         docstring const kind = operator[](from_ascii(key.substr(10)));
1075                         ret = getAuthorList(&buf, kind, true, false, true, false);
1076                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1077                                 ret[0] = uppercase(ret[0]);
1078                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1079                         // Special key to provide abbreviated name lists,
1080                         // irrespective of maxcitenames. Suitable for further names inside a
1081                         // bibliography item // (such as "ed. by ...")
1082                         docstring const kind = operator[](from_ascii(key.substr(15)));
1083                         ret = getAuthorList(&buf, kind, false, true, true, false);
1084                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1085                                 ret[0] = uppercase(ret[0]);
1086                 } else if (key == "abbrvciteauthor") {
1087                         // Special key to provide abbreviated author or
1088                         // editor names (suitable for citation labels),
1089                         // with respect to maxcitenames.
1090                         ret = getAuthorOrEditorList(&buf, false, false);
1091                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1092                                 ret[0] = uppercase(ret[0]);
1093                 } else if (key == "fullciteauthor") {
1094                         // Return a full author or editor list (for citation labels)
1095                         ret = getAuthorOrEditorList(&buf, true, false);
1096                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1097                                 ret[0] = uppercase(ret[0]);
1098                 } else if (key == "forceabbrvciteauthor") {
1099                         // Special key to provide abbreviated author or
1100                         // editor names (suitable for citation labels),
1101                         // irrespective of maxcitenames.
1102                         ret = getAuthorOrEditorList(&buf, false, true);
1103                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1104                                 ret[0] = uppercase(ret[0]);
1105                 } else if (key == "bibentry") {
1106                         // Special key to provide the full bibliography entry: see getInfo()
1107                         CiteEngineType const engine_type = buf.params().citeEngineType();
1108                         DocumentClass const & dc = buf.params().documentClass();
1109                         docstring const & format =
1110                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1111                         int counter = 0;
1112                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1113                 } else if (key == "textbefore")
1114                         ret = ci.textBefore;
1115                 else if (key == "textafter")
1116                         ret = ci.textAfter;
1117                 else if (key == "curpretext")
1118                         ret = ci.getPretexts()[bib_key_];
1119                 else if (key == "curposttext")
1120                         ret = ci.getPosttexts()[bib_key_];
1121                 else if (key == "year")
1122                         ret = getYear();
1123         }
1124
1125         if (cleanit)
1126                 ret = html::cleanAttr(ret);
1127
1128         // make sure it is not too big
1129         support::truncateWithEllipsis(ret, maxsize);
1130         return ret;
1131 }
1132
1133
1134 //////////////////////////////////////////////////////////////////////
1135 //
1136 // BiblioInfo
1137 //
1138 //////////////////////////////////////////////////////////////////////
1139
1140 namespace {
1141
1142 // A functor for use with sort, leading to case insensitive sorting
1143 class compareNoCase: public binary_function<docstring, docstring, bool>
1144 {
1145 public:
1146         bool operator()(docstring const & s1, docstring const & s2) const {
1147                 return compare_no_case(s1, s2) < 0;
1148         }
1149 };
1150
1151 } // namespace anon
1152
1153
1154 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1155 {
1156         vector<docstring> result;
1157         if (!data.isBibTeX())
1158                 return result;
1159         // Legacy crossref field. This is not nestable.
1160         if (!nested && !data["crossref"].empty()) {
1161                 docstring const xrefkey = data["crossref"];
1162                 result.push_back(xrefkey);
1163                 // However, check for nested xdatas
1164                 BiblioInfo::const_iterator it = find(xrefkey);
1165                 if (it != end()) {
1166                         BibTeXInfo const & xref = it->second;
1167                         vector<docstring> const nxdata = getXRefs(xref, true);
1168                         if (!nxdata.empty())
1169                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1170                 }
1171         }
1172         // Biblatex's xdata field. Infinitely nestable.
1173         // XData field can consist of a comma-separated list of keys
1174         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1175         if (!xdatakeys.empty()) {
1176                 vector<docstring>::const_iterator xit = xdatakeys.begin();
1177                 vector<docstring>::const_iterator xen = xdatakeys.end();
1178                 for (; xit != xen; ++xit) {
1179                         docstring const xdatakey = *xit;
1180                         result.push_back(xdatakey);
1181                         BiblioInfo::const_iterator it = find(xdatakey);
1182                         if (it != end()) {
1183                                 BibTeXInfo const & xdata = it->second;
1184                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1185                                 if (!nxdata.empty())
1186                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1187                         }
1188                 }
1189         }
1190         return result;
1191 }
1192
1193
1194 vector<docstring> const BiblioInfo::getKeys() const
1195 {
1196         vector<docstring> bibkeys;
1197         BiblioInfo::const_iterator it  = begin();
1198         for (; it != end(); ++it)
1199                 bibkeys.push_back(it->first);
1200         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1201         return bibkeys;
1202 }
1203
1204
1205 vector<docstring> const BiblioInfo::getFields() const
1206 {
1207         vector<docstring> bibfields;
1208         set<docstring>::const_iterator it = field_names_.begin();
1209         set<docstring>::const_iterator end = field_names_.end();
1210         for (; it != end; ++it)
1211                 bibfields.push_back(*it);
1212         sort(bibfields.begin(), bibfields.end());
1213         return bibfields;
1214 }
1215
1216
1217 vector<docstring> const BiblioInfo::getEntries() const
1218 {
1219         vector<docstring> bibentries;
1220         set<docstring>::const_iterator it = entry_types_.begin();
1221         set<docstring>::const_iterator end = entry_types_.end();
1222         for (; it != end; ++it)
1223                 bibentries.push_back(*it);
1224         sort(bibentries.begin(), bibentries.end());
1225         return bibentries;
1226 }
1227
1228
1229 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1230 {
1231         BiblioInfo::const_iterator it = find(key);
1232         if (it == end())
1233                 return docstring();
1234         BibTeXInfo const & data = it->second;
1235         return data.getAuthorOrEditorList(&buf, false);
1236 }
1237
1238
1239 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1240 {
1241         BiblioInfo::const_iterator it = find(key);
1242         if (it == end())
1243                 return docstring();
1244         BibTeXInfo const & data = it->second;
1245         return data.citeNumber();
1246 }
1247
1248
1249 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1250 {
1251         BiblioInfo::const_iterator it = find(key);
1252         if (it == end())
1253                 return docstring();
1254         BibTeXInfo const & data = it->second;
1255         docstring year = data.getYear();
1256         if (year.empty()) {
1257                 // let's try the crossrefs
1258                 vector<docstring> const xrefs = getXRefs(data);
1259                 if (xrefs.empty())
1260                         // no luck
1261                         return docstring();
1262                 vector<docstring>::const_iterator it = xrefs.begin();
1263                 vector<docstring>::const_iterator en = xrefs.end();
1264                 for (; it != en; ++it) {
1265                         BiblioInfo::const_iterator const xrefit = find(*it);
1266                         if (xrefit == end())
1267                                 continue;
1268                         BibTeXInfo const & xref_data = xrefit->second;
1269                         year = xref_data.getYear();
1270                         if (!year.empty())
1271                                 // success!
1272                                 break;
1273                 }
1274         }
1275         if (use_modifier && data.modifier() != 0)
1276                 year += data.modifier();
1277         return year;
1278 }
1279
1280
1281 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1282 {
1283         docstring const year = getYear(key, use_modifier);
1284         if (year.empty())
1285                 return buf.B_("No year");
1286         return year;
1287 }
1288
1289
1290 docstring const BiblioInfo::getInfo(docstring const & key,
1291         Buffer const & buf, CiteItem const & ci) const
1292 {
1293         BiblioInfo::const_iterator it = find(key);
1294         if (it == end())
1295                 return docstring(_("Bibliography entry not found!"));
1296         BibTeXInfo const & data = it->second;
1297         BibTeXInfoList xrefptrs;
1298         vector<docstring> const xrefs = getXRefs(data);
1299         if (!xrefs.empty()) {
1300                 vector<docstring>::const_iterator it = xrefs.begin();
1301                 vector<docstring>::const_iterator en = xrefs.end();
1302                 for (; it != en; ++it) {
1303                         BiblioInfo::const_iterator const xrefit = find(*it);
1304                         if (xrefit != end())
1305                                 xrefptrs.push_back(&(xrefit->second));
1306                 }
1307         }
1308         return data.getInfo(xrefptrs, buf, ci);
1309 }
1310
1311
1312 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1313         Buffer const & buf, string const & style, CiteItem const & ci) const
1314 {
1315         size_t max_size = ci.max_size;
1316         // shorter makes no sense
1317         LASSERT(max_size >= 16, max_size = 16);
1318
1319         // we can't display more than 10 of these, anyway
1320         bool const too_many_keys = keys.size() > 10;
1321         if (too_many_keys)
1322                 keys.resize(10);
1323
1324         CiteEngineType const engine_type = buf.params().citeEngineType();
1325         DocumentClass const & dc = buf.params().documentClass();
1326         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1327         docstring ret = format;
1328         vector<docstring>::const_iterator key = keys.begin();
1329         vector<docstring>::const_iterator ken = keys.end();
1330         for (int i = 0; key != ken; ++key, ++i) {
1331                 BiblioInfo::const_iterator it = find(*key);
1332                 BibTeXInfo empty_data;
1333                 empty_data.key(*key);
1334                 BibTeXInfo & data = empty_data;
1335                 vector<BibTeXInfo const *> xrefptrs;
1336                 if (it != end()) {
1337                         data = it->second;
1338                         vector<docstring> const xrefs = getXRefs(data);
1339                         if (!xrefs.empty()) {
1340                                 vector<docstring>::const_iterator it = xrefs.begin();
1341                                 vector<docstring>::const_iterator en = xrefs.end();
1342                                 for (; it != en; ++it) {
1343                                         BiblioInfo::const_iterator const xrefit = find(*it);
1344                                         if (xrefit != end())
1345                                                 xrefptrs.push_back(&(xrefit->second));
1346                                 }
1347                         }
1348                 }
1349                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1350         }
1351
1352         if (too_many_keys)
1353                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1354         support::truncateWithEllipsis(ret, max_size);
1355         return ret;
1356 }
1357
1358
1359 bool BiblioInfo::isBibtex(docstring const & key) const
1360 {
1361         docstring key1;
1362         split(key, key1, ',');
1363         BiblioInfo::const_iterator it = find(key1);
1364         if (it == end())
1365                 return false;
1366         return it->second.isBibTeX();
1367 }
1368
1369
1370 vector<docstring> const BiblioInfo::getCiteStrings(
1371         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1372         Buffer const & buf, CiteItem const & ci) const
1373 {
1374         if (empty())
1375                 return vector<docstring>();
1376
1377         string style;
1378         vector<docstring> vec(styles.size());
1379         for (size_t i = 0; i != vec.size(); ++i) {
1380                 style = styles[i].name;
1381                 vec[i] = getLabel(keys, buf, style, ci);
1382         }
1383
1384         return vec;
1385 }
1386
1387
1388 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1389 {
1390         bimap_.insert(info.begin(), info.end());
1391         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1392         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1393 }
1394
1395
1396 namespace {
1397
1398 // used in xhtml to sort a list of BibTeXInfo objects
1399 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1400 {
1401         docstring const lauth = lhs->getAuthorOrEditorList();
1402         docstring const rauth = rhs->getAuthorOrEditorList();
1403         docstring const lyear = lhs->getYear();
1404         docstring const ryear = rhs->getYear();
1405         docstring const ltitl = lhs->operator[]("title");
1406         docstring const rtitl = rhs->operator[]("title");
1407         return  (lauth < rauth)
1408                 || (lauth == rauth && lyear < ryear)
1409                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1410 }
1411
1412 }
1413
1414
1415 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1416 {
1417         cited_entries_.clear();
1418         // We are going to collect all the citation keys used in the document,
1419         // getting them from the TOC.
1420         // FIXME We may want to collect these differently, in the first case,
1421         // so that we might have them in order of appearance.
1422         set<docstring> citekeys;
1423         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1424         Toc::const_iterator it = toc->begin();
1425         Toc::const_iterator const en = toc->end();
1426         for (; it != en; ++it) {
1427                 if (it->str().empty())
1428                         continue;
1429                 vector<docstring> const keys = getVectorFromString(it->str());
1430                 citekeys.insert(keys.begin(), keys.end());
1431         }
1432         if (citekeys.empty())
1433                 return;
1434
1435         // We have a set of the keys used in this document.
1436         // We will now convert it to a list of the BibTeXInfo objects used in
1437         // this document...
1438         vector<BibTeXInfo const *> bi;
1439         set<docstring>::const_iterator cit = citekeys.begin();
1440         set<docstring>::const_iterator const cen = citekeys.end();
1441         for (; cit != cen; ++cit) {
1442                 BiblioInfo::const_iterator const bt = find(*cit);
1443                 if (bt == end() || !bt->second.isBibTeX())
1444                         continue;
1445                 bi.push_back(&(bt->second));
1446         }
1447         // ...and sort it.
1448         sort(bi.begin(), bi.end(), lSorter);
1449
1450         // Now we can write the sorted keys
1451         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1452         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1453         for (; bit != ben; ++bit)
1454                 cited_entries_.push_back((*bit)->key());
1455 }
1456
1457
1458 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1459 {
1460         collectCitedEntries(buf);
1461         CiteEngineType const engine_type = buf.params().citeEngineType();
1462         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1463
1464         int keynumber = 0;
1465         char modifier = 0;
1466         // used to remember the last one we saw
1467         // we'll be comparing entries to see if we need to add
1468         // modifiers, like "1984a"
1469         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1470
1471         vector<docstring>::const_iterator it = cited_entries_.begin();
1472         vector<docstring>::const_iterator const en = cited_entries_.end();
1473         for (; it != en; ++it) {
1474                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1475                 // this shouldn't happen, but...
1476                 if (biit == bimap_.end())
1477                         // ...fail gracefully, anyway.
1478                         continue;
1479                 BibTeXInfo & entry = biit->second;
1480                 if (numbers) {
1481                         docstring const num = convert<docstring>(++keynumber);
1482                         entry.setCiteNumber(num);
1483                 } else {
1484                         // The first test here is checking whether this is the first
1485                         // time through the loop. If so, then we do not have anything
1486                         // with which to compare.
1487                         if (last != bimap_.end()
1488                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1489                             // we access the year via getYear() so as to get it from the xref,
1490                             // if we need to do so
1491                             && getYear(entry.key()) == getYear(last->second.key())) {
1492                                 if (modifier == 0) {
1493                                         // so the last one should have been 'a'
1494                                         last->second.setModifier('a');
1495                                         modifier = 'b';
1496                                 } else if (modifier == 'z')
1497                                         modifier = 'A';
1498                                 else
1499                                         modifier++;
1500                         } else {
1501                                 modifier = 0;
1502                         }
1503                         entry.setModifier(modifier);
1504                         // remember the last one
1505                         last = biit;
1506                 }
1507         }
1508         // Set the labels
1509         it = cited_entries_.begin();
1510         for (; it != en; ++it) {
1511                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1512                 // this shouldn't happen, but...
1513                 if (biit == bimap_.end())
1514                         // ...fail gracefully, anyway.
1515                         continue;
1516                 BibTeXInfo & entry = biit->second;
1517                 if (numbers) {
1518                         entry.label(entry.citeNumber());
1519                 } else {
1520                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1521                         // we do it this way so as to access the xref, if necessary
1522                         // note that this also gives us the modifier
1523                         docstring const year = getYear(*it, buf, true);
1524                         if (!auth.empty() && !year.empty())
1525                                 entry.label(auth + ' ' + year);
1526                         else
1527                                 entry.label(entry.key());
1528                 }
1529         }
1530 }
1531
1532
1533 //////////////////////////////////////////////////////////////////////
1534 //
1535 // CitationStyle
1536 //
1537 //////////////////////////////////////////////////////////////////////
1538
1539
1540 CitationStyle citationStyleFromString(string const & command,
1541                                       BufferParams const & params)
1542 {
1543         CitationStyle cs;
1544         if (command.empty())
1545                 return cs;
1546
1547         string const alias = params.getCiteAlias(command);
1548         string cmd = alias.empty() ? command : alias;
1549         if (isUpperCase(command[0])) {
1550                 cs.forceUpperCase = true;
1551                 cmd[0] = lowercase(cmd[0]);
1552         }
1553
1554         size_t const n = command.size() - 1;
1555         if (command[n] == '*') {
1556                 cs.hasStarredVersion = true;
1557                 if (suffixIs(cmd, '*'))
1558                         cmd = cmd.substr(0, cmd.size() - 1);
1559         }
1560
1561         cs.name = cmd;
1562         return cs;
1563 }
1564
1565
1566 string citationStyleToString(const CitationStyle & cs, bool const latex)
1567 {
1568         string cmd = latex ? cs.cmd : cs.name;
1569         if (cs.forceUpperCase)
1570                 cmd[0] = uppercase(cmd[0]);
1571         if (cs.hasStarredVersion)
1572                 cmd += '*';
1573         return cmd;
1574 }
1575
1576 } // namespace lyx