src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Encoding.h"
  21 #include "InsetIterator.h"
  22 #include "Language.h"
  23 #include "output_xhtml.h"
  24 #include "Paragraph.h"
  25 #include "TextClass.h"
  26 #include "TocBackend.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/gettext.h"
  32 #include "support/lassert.h"
  33 #include "support/lstrings.h"
  34 #include "support/regex.h"
  35 #include "support/textutils.h"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).empty())
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94
  95 // converts a string containing LaTeX commands into unicode
  96 // for display.
  97 docstring convertLaTeXCommands(docstring const & str)
  98 {
  99         docstring val = str;
 100         docstring ret;
 101
 102         bool scanning_cmd = false;
 103         bool scanning_math = false;
 104         bool escaped = false; // used to catch \$, etc.
 105         while (!val.empty()) {
 106                 char_type const ch = val[0];
 107
 108                 // if we're scanning math, we output everything until we
 109                 // find an unescaped $, at which point we break out.
 110                 if (scanning_math) {
 111                         if (escaped)
 112                                 escaped = false;
 113                         else if (ch == '\\')
 114                                 escaped = true;
 115                         else if (ch == '$')
 116                                 scanning_math = false;
 117                         ret += ch;
 118                         val = val.substr(1);
 119                         continue;
 120                 }
 121
 122                 // if we're scanning a command name, then we just
 123                 // discard characters until we hit something that
 124                 // isn't alpha.
 125                 if (scanning_cmd) {
 126                         if (isAlphaASCII(ch)) {
 127                                 val = val.substr(1);
 128                                 escaped = false;
 129                                 continue;
 130                         }
 131                         // so we're done with this command.
 132                         // now we fall through and check this character.
 133                         scanning_cmd = false;
 134                 }
 135
 136                 // was the last character a \? If so, then this is something like:
 137                 // \\ or \$, so we'll just output it. That's probably not always right...
 138                 if (escaped) {
 139                         // exception: output \, as THIN SPACE
 140                         if (ch == ',')
 141                                 ret.push_back(0x2009);
 142                         else
 143                                 ret += ch;
 144                         val = val.substr(1);
 145                         escaped = false;
 146                         continue;
 147                 }
 148
 149                 if (ch == '$') {
 150                         ret += ch;
 151                         val = val.substr(1);
 152                         scanning_math = true;
 153                         continue;
 154                 }
 155
 156                 // we just ignore braces
 157                 if (ch == '{' || ch == '}') {
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // we're going to check things that look like commands, so if
 163                 // this doesn't, just output it.
 164                 if (ch != '\\') {
 165                         ret += ch;
 166                         val = val.substr(1);
 167                         continue;
 168                 }
 169
 170                 // ok, could be a command of some sort
 171                 // let's see if it corresponds to some unicode
 172                 // unicodesymbols has things in the form: \"{u},
 173                 // whereas we may see things like: \"u. So we'll
 174                 // look for that and change it, if necessary.
 175                 // FIXME: This is a sort of mini-tex2lyx.
 176                 //        Use the real tex2lyx instead!
 177                 static lyx::regex const reg("^\\\\\\W\\w");
 178                 if (lyx::regex_search(to_utf8(val), reg)) {
 179                         val.insert(3, from_ascii("}"));
 180                         val.insert(2, from_ascii("{"));
 181                 }
 182                 bool termination;
 183                 docstring rem;
 184                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 185                                 Encodings::TEXT_CMD, termination, rem);
 186                 if (!cnvtd.empty()) {
 187                         // it did, so we'll take that bit and proceed with what's left
 188                         ret += cnvtd;
 189                         val = rem;
 190                         continue;
 191                 }
 192                 // it's a command of some sort
 193                 scanning_cmd = true;
 194                 escaped = true;
 195                 val = val.substr(1);
 196         }
 197         return ret;
 198 }
 199
 200
 201 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 202 docstring processRichtext(docstring const & str, bool richtext)
 203 {
 204         docstring val = str;
 205         docstring ret;
 206
 207         bool scanning_rich = false;
 208         while (!val.empty()) {
 209                 char_type const ch = val[0];
 210                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 211                         // beginning of rich text
 212                         scanning_rich = true;
 213                         val = val.substr(2);
 214                         continue;
 215                 }
 216                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 217                         // end of rich text
 218                         scanning_rich = false;
 219                         val = val.substr(2);
 220                         continue;
 221                 }
 222                 if (richtext) {
 223                         if (scanning_rich)
 224                                 ret += ch;
 225                         else {
 226                                 // we need to escape '<' and '>'
 227                                 if (ch == '<')
 228                                         ret += "&lt;";
 229                                 else if (ch == '>')
 230                                         ret += "&gt;";
 231                                 else
 232                                         ret += ch;
 233                         }
 234                 } else if (!scanning_rich /* && !richtext */)
 235                         ret += ch;
 236                 // else the character is discarded, which will happen only if
 237                 // richtext == false and we are scanning rich text
 238                 val = val.substr(1);
 239         }
 240         return ret;
 241 }
 242
 243 } // anon namespace
 244
 245
 246 //////////////////////////////////////////////////////////////////////
 247 //
 248 // BibTeXInfo
 249 //
 250 //////////////////////////////////////////////////////////////////////
 251
 252 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 253         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 254           modifier_(0)
 255 {}
 256
 257
 258 docstring const BibTeXInfo::getAbbreviatedAuthor(
 259     Buffer const * buf, bool jurabib_style) const
 260 {
 261         if (!is_bibtex_) {
 262                 docstring const opt = label();
 263                 if (opt.empty())
 264                         return docstring();
 265
 266                 docstring authors;
 267                 docstring const remainder = trim(split(opt, authors, '('));
 268                 if (remainder.empty())
 269                         // in this case, we didn't find a "(",
 270                         // so we don't have author (year)
 271                         return docstring();
 272                 return authors;
 273         }
 274
 275         docstring author = operator[]("author");
 276         if (author.empty()) {
 277                 author = operator[]("editor");
 278                 if (author.empty())
 279                         return author;
 280         }
 281
 282         // FIXME Move this to a separate routine that can
 283         // be called from elsewhere.
 284         //
 285         // OK, we've got some names. Let's format them.
 286         // Try to split the author list on " and "
 287         vector<docstring> const authors =
 288                 getVectorFromString(author, from_ascii(" and "));
 289
 290         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 291                 docstring shortauthor = familyName(authors[0])
 292                         + "/" + familyName(authors[1]);
 293                 if (authors.size() == 3)
 294                         shortauthor += "/" + familyName(authors[2]);
 295                 return convertLaTeXCommands(shortauthor);
 296         }
 297
 298         docstring retval = familyName(authors[0]);
 299
 300         if (authors.size() == 2 && authors[1] != "others") {
 301                 docstring const dformat = buf ?
 302                         buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
 303                 retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
 304         } else if (authors.size() >= 2) {
 305                 // we get here either if the author list is longer than two names
 306                 // or if the second 'name' is "others". we do the same thing either
 307                 // way.
 308                 docstring const dformat = buf ?
 309                         buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
 310                 retval = bformat(dformat, familyName(authors[0]));
 311         }
 312
 313         return convertLaTeXCommands(retval);
 314 }
 315
 316
 317 docstring const BibTeXInfo::getYear() const
 318 {
 319         if (is_bibtex_) {
 320                 // first try legacy year field
 321                 docstring year = operator[]("year");
 322                 if (!year.empty())
 323                         return year;
 324                 // now try biblatex's date field
 325                 year = operator[]("date");
 326                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 327                 // We only want the years.
 328                 static regex const yreg("[-]?([\\d]{4}).*");
 329                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 330                 smatch sm;
 331                 string const date = to_utf8(year);
 332                 regex_match(date, sm, yreg);
 333                 year = from_ascii(sm[1]);
 334                 // check for an endyear
 335                 if (regex_match(date, sm, ereg))
 336                         year += char_type(0x2013) + from_ascii(sm[1]);
 337                 return year;
 338         }
 339
 340         docstring const opt = label();
 341         if (opt.empty())
 342                 return docstring();
 343
 344         docstring authors;
 345         docstring tmp = split(opt, authors, '(');
 346         if (tmp.empty())
 347                 // we don't have author (year)
 348                 return docstring();
 349         docstring year;
 350         tmp = split(tmp, year, ')');
 351         return year;
 352 }
 353
 354
 355 namespace {
 356
 357 docstring parseOptions(docstring const & format, string & optkey,
 358                     docstring & ifpart, docstring & elsepart);
 359
 360 // Calls parseOptions to deal with an embedded option, such as:
 361 //   {%number%[[, no.~%number%]]}
 362 // which must appear at the start of format. ifelsepart gets the
 363 // whole of the option, and we return what's left after the option.
 364 // we return format if there is an error.
 365 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 366 {
 367         LASSERT(format[0] == '{' && format[1] == '%', return format);
 368         string optkey;
 369         docstring ifpart;
 370         docstring elsepart;
 371         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 372         if (format == rest) { // parse error
 373                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 374                 return format;
 375         }
 376         LASSERT(rest.size() <= format.size(),
 377                 { ifelsepart = docstring(); return format; });
 378         ifelsepart = format.substr(0, format.size() - rest.size());
 379         return rest;
 380 }
 381
 382
 383 // Gets a "clause" from a format string, where the clause is
 384 // delimited by '[[' and ']]'. Returns what is left after the
 385 // clause is removed, and returns format if there is an error.
 386 docstring getClause(docstring const & format, docstring & clause)
 387 {
 388         docstring fmt = format;
 389         // remove '[['
 390         fmt = fmt.substr(2);
 391         // we'll remove characters from the front of fmt as we
 392         // deal with them
 393         while (!fmt.empty()) {
 394                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 395                         // that's the end
 396                         fmt = fmt.substr(2);
 397                         break;
 398                 }
 399                 // check for an embedded option
 400                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 401                         docstring part;
 402                         docstring const rest = parseEmbeddedOption(fmt, part);
 403                         if (fmt == rest) {
 404                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 405                                 return format;
 406                         }
 407                         clause += part;
 408                         fmt = rest;
 409                 } else { // it's just a normal character
 410                                 clause += fmt[0];
 411                                 fmt = fmt.substr(1);
 412                 }
 413         }
 414         return fmt;
 415 }
 416
 417
 418 // parse an options string, which must appear at the start of the
 419 // format parameter. puts the parsed bits in optkey, ifpart, and
 420 // elsepart and returns what's left after the option is removed.
 421 // if there's an error, it returns format itself.
 422 docstring parseOptions(docstring const & format, string & optkey,
 423                     docstring & ifpart, docstring & elsepart)
 424 {
 425         LASSERT(format[0] == '{' && format[1] == '%', return format);
 426         // strip '{%'
 427         docstring fmt = format.substr(2);
 428         size_t pos = fmt.find('%'); // end of key
 429         if (pos == string::npos) {
 430                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 431                 return format;
 432         }
 433         optkey = to_utf8(fmt.substr(0, pos));
 434         fmt = fmt.substr(pos + 1);
 435         // [[format]] should be next
 436         if (fmt[0] != '[' || fmt[1] != '[') {
 437                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 438                 return format;
 439         }
 440
 441         docstring curfmt = fmt;
 442         fmt = getClause(curfmt, ifpart);
 443         if (fmt == curfmt) {
 444                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 445                 return format;
 446         }
 447
 448         if (fmt[0] == '}') // we're done, no else clause
 449                 return fmt.substr(1);
 450
 451         // else part should follow
 452         if (fmt[0] != '[' || fmt[1] != '[') {
 453                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 454                 return format;
 455         }
 456
 457         curfmt = fmt;
 458         fmt = getClause(curfmt, elsepart);
 459         // we should be done
 460         if (fmt == curfmt || fmt[0] != '}') {
 461                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 462                 return format;
 463         }
 464         return fmt.substr(1);
 465 }
 466
 467
 468 } // anon namespace
 469
 470 /* FIXME
 471 Bug #9131 revealed an oddity in how we are generating citation information
 472 when more than one key is given. We end up building a longer and longer format
 473 string as we go, which we then have to re-parse, over and over and over again,
 474 rather than generating the information for the individual keys and then putting
 475 all of that together. We do that to deal with the way separators work, from what
 476 I can tell, but it still feels like a hack. Fixing this would require quite a
 477 bit of work, however.
 478 */
 479 docstring BibTeXInfo::expandFormat(docstring const & format,
 480                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 481                 docstring before, docstring after, docstring dialog, bool next) const
 482 {
 483         // incorrect use of macros could put us in an infinite loop
 484         static int const max_passes = 5000;
 485         // the use of overly large keys can lead to performance problems, due
 486         // to eventual attempts to convert LaTeX macros to unicode. See bug
 487         // #8944. This is perhaps not the best solution, but it will have to
 488         // do for now.
 489         static size_t const max_keysize = 128;
 490         odocstringstream ret; // return value
 491         string key;
 492         bool scanning_key = false;
 493         bool scanning_rich = false;
 494
 495         CiteEngineType const engine_type = buf.params().citeEngineType();
 496         docstring fmt = format;
 497         // we'll remove characters from the front of fmt as we
 498         // deal with them
 499         while (!fmt.empty()) {
 500                 if (counter > max_passes) {
 501                         LYXERR0("Recursion limit reached while parsing `"
 502                                 << format << "'.");
 503                         return _("ERROR!");
 504                 }
 505
 506                 char_type thischar = fmt[0];
 507                 if (thischar == '%') {
 508                         // beginning or end of key
 509                         if (scanning_key) {
 510                                 // end of key
 511                                 scanning_key = false;
 512                                 // so we replace the key with its value, which may be empty
 513                                 if (key[0] == '!') {
 514                                         // macro
 515                                         string const val =
 516                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 517                                         fmt = from_utf8(val) + fmt.substr(1);
 518                                         counter += 1;
 519                                         continue;
 520                                 } else if (key[0] == '_') {
 521                                         // a translatable bit
 522                                         string const val =
 523                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 524                                         docstring const trans =
 525                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 526                                         ret << trans;
 527                                 } else {
 528                                         docstring const val =
 529                                                 getValueForKey(key, buf, before, after, dialog, xrefs, max_keysize);
 530                                         if (!scanning_rich)
 531                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 532                                         ret << val;
 533                                         if (!scanning_rich)
 534                                                 ret << from_ascii("{!</span>!}");
 535                                 }
 536                         } else {
 537                                 // beginning of key
 538                                 key.clear();
 539                                 scanning_key = true;
 540                         }
 541                 }
 542                 else if (thischar == '{') {
 543                         // beginning of option?
 544                         if (scanning_key) {
 545                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 546                                 return _("ERROR!");
 547                         }
 548                         if (fmt.size() > 1) {
 549                                 if (fmt[1] == '%') {
 550                                         // it is the beginning of an optional format
 551                                         string optkey;
 552                                         docstring ifpart;
 553                                         docstring elsepart;
 554                                         docstring const newfmt =
 555                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 556                                         if (newfmt == fmt) // parse error
 557                                                 return _("ERROR!");
 558                                         fmt = newfmt;
 559                                         docstring const val =
 560                                                 getValueForKey(optkey, buf, before, after, dialog, xrefs);
 561                                         if (optkey == "next" && next)
 562                                                 ret << ifpart; // without expansion
 563                                         else if (!val.empty()) {
 564                                                 int newcounter = 0;
 565                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 566                                                         before, after, dialog, next);
 567                                         } else if (!elsepart.empty()) {
 568                                                 int newcounter = 0;
 569                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 570                                                         before, after, dialog, next);
 571                                         }
 572                                         // fmt will have been shortened for us already
 573                                         continue;
 574                                 }
 575                                 if (fmt[1] == '!') {
 576                                         // beginning of rich text
 577                                         scanning_rich = true;
 578                                         fmt = fmt.substr(2);
 579                                         ret << from_ascii("{!");
 580                                         continue;
 581                                 }
 582                         }
 583                         // we are here if '{' was not followed by % or !.
 584                         // So it's just a character.
 585                         ret << thischar;
 586                 }
 587                 else if (scanning_rich && thischar == '!'
 588                          && fmt.size() > 1 && fmt[1] == '}') {
 589                         // end of rich text
 590                         scanning_rich = false;
 591                         fmt = fmt.substr(2);
 592                         ret << from_ascii("!}");
 593                         continue;
 594                 }
 595                 else if (scanning_key)
 596                         key += char(thischar);
 597                 else {
 598                         try {
 599                                 ret.put(thischar);
 600                         } catch (EncodingException & /* e */) {
 601                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 602                         }
 603                 }
 604                 fmt = fmt.substr(1);
 605         } // for loop
 606         if (scanning_key) {
 607                 LYXERR0("Never found end of key in `" << format << "'!");
 608                 return _("ERROR!");
 609         }
 610         if (scanning_rich) {
 611                 LYXERR0("Never found end of rich text in `" << format << "'!");
 612                 return _("ERROR!");
 613         }
 614         return ret.str();
 615 }
 616
 617
 618 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 619         Buffer const & buf, bool richtext) const
 620 {
 621         if (!richtext && !info_.empty())
 622                 return info_;
 623         if (richtext && !info_richtext_.empty())
 624                 return info_richtext_;
 625
 626         if (!is_bibtex_) {
 627                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 628                 info_ = it->second;
 629                 return info_;
 630         }
 631
 632         CiteEngineType const engine_type = buf.params().citeEngineType();
 633         DocumentClass const & dc = buf.params().documentClass();
 634         docstring const & format =
 635                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 636         int counter = 0;
 637         info_ = expandFormat(format, xrefs, counter, buf,
 638                 docstring(), docstring(), docstring(), false);
 639
 640         if (info_.empty()) {
 641                 // this probably shouldn't happen
 642                 return info_;
 643         }
 644
 645         if (richtext) {
 646                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 647                 return info_richtext_;
 648         }
 649
 650         info_ = convertLaTeXCommands(processRichtext(info_, false));
 651         return info_;
 652 }
 653
 654
 655 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 656         Buffer const & buf, docstring const & format, bool richtext,
 657         docstring const & before, docstring const & after,
 658         docstring const & dialog, bool next) const
 659 {
 660         docstring loclabel;
 661
 662         int counter = 0;
 663         loclabel = expandFormat(format, xrefs, counter, buf,
 664                 before, after, dialog, next);
 665
 666         if (!loclabel.empty() && !next) {
 667                 loclabel = processRichtext(loclabel, richtext);
 668                 loclabel = convertLaTeXCommands(loclabel);
 669         }
 670
 671         return loclabel;
 672 }
 673
 674
 675 docstring const & BibTeXInfo::operator[](docstring const & field) const
 676 {
 677         BibTeXInfo::const_iterator it = find(field);
 678         if (it != end())
 679                 return it->second;
 680         static docstring const empty_value = docstring();
 681         return empty_value;
 682 }
 683
 684
 685 docstring const & BibTeXInfo::operator[](string const & field) const
 686 {
 687         return operator[](from_ascii(field));
 688 }
 689
 690
 691 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 692         docstring const & before, docstring const & after, docstring const & dialog,
 693         BibTeXInfoList const xrefs, size_t maxsize) const
 694 {
 695         // anything less is pointless
 696         LASSERT(maxsize >= 16, maxsize = 16);
 697         string key = oldkey;
 698         bool cleanit = false;
 699         if (prefixIs(oldkey, "clean:")) {
 700                 key = oldkey.substr(6);
 701                 cleanit = true;
 702         }
 703
 704         docstring ret = operator[](key);
 705         if (ret.empty() && !xrefs.empty()) {
 706                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 707                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 708                 for (; it != en; ++it) {
 709                         if (*it && !(**it)[key].empty()) {
 710                                 ret = (**it)[key];
 711                                 break;
 712                         }
 713                 }
 714         }
 715         if (ret.empty()) {
 716                 // some special keys
 717                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 718                 if (key == "dialog")
 719                         ret = dialog;
 720                 else if (key == "entrytype")
 721                         ret = entry_type_;
 722                 else if (key == "key")
 723                         ret = bib_key_;
 724                 else if (key == "label")
 725                         ret = label_;
 726                 else if (key == "modifier" && modifier_ != 0)
 727                         ret = modifier_;
 728                 else if (key == "numericallabel")
 729                         ret = cite_number_;
 730                 else if (key == "abbrvauthor")
 731                         // Special key to provide abbreviated author names.
 732                         ret = getAbbreviatedAuthor(&buf, false);
 733                 else if (key == "shortauthor")
 734                         // When shortauthor is not defined, jurabib automatically
 735                         // provides jurabib-style abbreviated author names. We do
 736                         // this as well.
 737                         ret = getAbbreviatedAuthor(&buf, true);
 738                 else if (key == "shorttitle") {
 739                         // When shorttitle is not defined, jurabib uses for `article'
 740                         // and `periodical' entries the form `journal volume [year]'
 741                         // and for other types of entries it uses the `title' field.
 742                         if (entry_type_ == "article" || entry_type_ == "periodical")
 743                                 ret = operator[]("journal") + " " + operator[]("volume")
 744                                         + " [" + operator[]("year") + "]";
 745                         else
 746                                 ret = operator[]("title");
 747                 } else if (key == "bibentry") {
 748                         // Special key to provide the full bibliography entry: see getInfo()
 749                         CiteEngineType const engine_type = buf.params().citeEngineType();
 750                         DocumentClass const & dc = buf.params().documentClass();
 751                         docstring const & format =
 752                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 753                         int counter = 0;
 754                         ret = expandFormat(format, xrefs, counter, buf,
 755                                 docstring(), docstring(), docstring(), false);
 756                 } else if (key == "textbefore")
 757                         ret = before;
 758                 else if (key == "textafter")
 759                         ret = after;
 760                 else if (key == "year")
 761                         ret = getYear();
 762         }
 763
 764         if (cleanit)
 765                 ret = html::cleanAttr(ret);
 766
 767         // make sure it is not too big
 768         support::truncateWithEllipsis(ret, maxsize);
 769         return ret;
 770 }
 771
 772
 773 //////////////////////////////////////////////////////////////////////
 774 //
 775 // BiblioInfo
 776 //
 777 //////////////////////////////////////////////////////////////////////
 778
 779 namespace {
 780
 781 // A functor for use with sort, leading to case insensitive sorting
 782 class compareNoCase: public binary_function<docstring, docstring, bool>
 783 {
 784 public:
 785         bool operator()(docstring const & s1, docstring const & s2) const {
 786                 return compare_no_case(s1, s2) < 0;
 787         }
 788 };
 789
 790 } // namespace anon
 791
 792
 793 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
 794 {
 795         vector<docstring> result;
 796         if (!data.isBibTeX())
 797                 return result;
 798         // Legacy crossref field. This is not nestable.
 799         if (!nested && !data["crossref"].empty()) {
 800                 docstring const xrefkey = data["crossref"];
 801                 result.push_back(xrefkey);
 802                 // However, check for nested xdatas
 803                 BiblioInfo::const_iterator it = find(xrefkey);
 804                 if (it != end()) {
 805                         BibTeXInfo const & xref = it->second;
 806                         vector<docstring> const nxdata = getXRefs(xref, true);
 807                         if (!nxdata.empty())
 808                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
 809                 }
 810         }
 811         // Biblatex's xdata field. Infinitely nestable.
 812         // XData field can consist of a comma-separated list of keys
 813         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
 814         if (!xdatakeys.empty()) {
 815                 vector<docstring>::const_iterator xit = xdatakeys.begin();
 816                 vector<docstring>::const_iterator xen = xdatakeys.end();
 817                 for (; xit != xen; ++xit) {
 818                         docstring const xdatakey = *xit;
 819                         result.push_back(xdatakey);
 820                         BiblioInfo::const_iterator it = find(xdatakey);
 821                         if (it != end()) {
 822                                 BibTeXInfo const & xdata = it->second;
 823                                 vector<docstring> const nxdata = getXRefs(xdata, true);
 824                                 if (!nxdata.empty())
 825                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
 826                         }
 827                 }
 828         }
 829         return result;
 830 }
 831
 832
 833 vector<docstring> const BiblioInfo::getKeys() const
 834 {
 835         vector<docstring> bibkeys;
 836         BiblioInfo::const_iterator it  = begin();
 837         for (; it != end(); ++it)
 838                 bibkeys.push_back(it->first);
 839         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 840         return bibkeys;
 841 }
 842
 843
 844 vector<docstring> const BiblioInfo::getFields() const
 845 {
 846         vector<docstring> bibfields;
 847         set<docstring>::const_iterator it = field_names_.begin();
 848         set<docstring>::const_iterator end = field_names_.end();
 849         for (; it != end; ++it)
 850                 bibfields.push_back(*it);
 851         sort(bibfields.begin(), bibfields.end());
 852         return bibfields;
 853 }
 854
 855
 856 vector<docstring> const BiblioInfo::getEntries() const
 857 {
 858         vector<docstring> bibentries;
 859         set<docstring>::const_iterator it = entry_types_.begin();
 860         set<docstring>::const_iterator end = entry_types_.end();
 861         for (; it != end; ++it)
 862                 bibentries.push_back(*it);
 863         sort(bibentries.begin(), bibentries.end());
 864         return bibentries;
 865 }
 866
 867
 868 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 869 {
 870         BiblioInfo::const_iterator it = find(key);
 871         if (it == end())
 872                 return docstring();
 873         BibTeXInfo const & data = it->second;
 874         return data.getAbbreviatedAuthor(&buf, false);
 875 }
 876
 877
 878 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 879 {
 880         BiblioInfo::const_iterator it = find(key);
 881         if (it == end())
 882                 return docstring();
 883         BibTeXInfo const & data = it->second;
 884         return data.citeNumber();
 885 }
 886
 887
 888 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 889 {
 890         BiblioInfo::const_iterator it = find(key);
 891         if (it == end())
 892                 return docstring();
 893         BibTeXInfo const & data = it->second;
 894         docstring year = data.getYear();
 895         if (year.empty()) {
 896                 // let's try the crossrefs
 897                 vector<docstring> const xrefs = getXRefs(data);
 898                 if (xrefs.empty())
 899                         // no luck
 900                         return docstring();
 901                 vector<docstring>::const_iterator it = xrefs.begin();
 902                 vector<docstring>::const_iterator en = xrefs.end();
 903                 for (; it != en; ++it) {
 904                         BiblioInfo::const_iterator const xrefit = find(*it);
 905                         if (xrefit == end())
 906                                 continue;
 907                         BibTeXInfo const & xref_data = xrefit->second;
 908                         year = xref_data.getYear();
 909                         if (!year.empty())
 910                                 // success!
 911                                 break;
 912                 }
 913         }
 914         if (use_modifier && data.modifier() != 0)
 915                 year += data.modifier();
 916         return year;
 917 }
 918
 919
 920 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 921 {
 922         docstring const year = getYear(key, use_modifier);
 923         if (year.empty())
 924                 return buf.B_("No year");
 925         return year;
 926 }
 927
 928
 929 docstring const BiblioInfo::getInfo(docstring const & key,
 930         Buffer const & buf, bool richtext) const
 931 {
 932         BiblioInfo::const_iterator it = find(key);
 933         if (it == end())
 934                 return docstring(_("Bibliography entry not found!"));
 935         BibTeXInfo const & data = it->second;
 936         BibTeXInfoList xrefptrs;
 937         vector<docstring> const xrefs = getXRefs(data);
 938         if (!xrefs.empty()) {
 939                 vector<docstring>::const_iterator it = xrefs.begin();
 940                 vector<docstring>::const_iterator en = xrefs.end();
 941                 for (; it != en; ++it) {
 942                         BiblioInfo::const_iterator const xrefit = find(*it);
 943                         if (xrefit != end())
 944                                 xrefptrs.push_back(&(xrefit->second));
 945                 }
 946         }
 947         return data.getInfo(xrefptrs, buf, richtext);
 948 }
 949
 950
 951 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 952         Buffer const & buf, string const & style, bool for_xhtml,
 953         size_t max_size, docstring const & before, docstring const & after,
 954         docstring const & dialog) const
 955 {
 956         // shorter makes no sense
 957         LASSERT(max_size >= 16, max_size = 16);
 958
 959         // we can't display more than 10 of these, anyway
 960         bool const too_many_keys = keys.size() > 10;
 961         if (too_many_keys)
 962                 keys.resize(10);
 963
 964         CiteEngineType const engine_type = buf.params().citeEngineType();
 965         DocumentClass const & dc = buf.params().documentClass();
 966         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 967         docstring ret = format;
 968         vector<docstring>::const_iterator key = keys.begin();
 969         vector<docstring>::const_iterator ken = keys.end();
 970         for (; key != ken; ++key) {
 971                 BiblioInfo::const_iterator it = find(*key);
 972                 BibTeXInfo empty_data;
 973                 empty_data.key(*key);
 974                 BibTeXInfo & data = empty_data;
 975                 vector<BibTeXInfo const *> xrefptrs;
 976                 if (it != end()) {
 977                         data = it->second;
 978                         vector<docstring> const xrefs = getXRefs(data);
 979                         if (!xrefs.empty()) {
 980                                 vector<docstring>::const_iterator it = xrefs.begin();
 981                                 vector<docstring>::const_iterator en = xrefs.end();
 982                                 for (; it != en; ++it) {
 983                                         BiblioInfo::const_iterator const xrefit = find(*it);
 984                                         if (xrefit != end())
 985                                                 xrefptrs.push_back(&(xrefit->second));
 986                                 }
 987                         }
 988                 }
 989                 ret = data.getLabel(xrefptrs, buf, ret, for_xhtml,
 990                         before, after, dialog, key + 1 != ken);
 991         }
 992
 993         if (too_many_keys)
 994                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 995         support::truncateWithEllipsis(ret, max_size);
 996         return ret;
 997 }
 998
 999
1000 bool BiblioInfo::isBibtex(docstring const & key) const
1001 {
1002         docstring key1;
1003         split(key, key1, ',');
1004         BiblioInfo::const_iterator it = find(key1);
1005         if (it == end())
1006                 return false;
1007         return it->second.isBibTeX();
1008 }
1009
1010
1011 vector<docstring> const BiblioInfo::getCiteStrings(
1012         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1013         Buffer const & buf, docstring const & before,
1014         docstring const & after, docstring const & dialog, size_t max_size) const
1015 {
1016         if (empty())
1017                 return vector<docstring>();
1018
1019         string style;
1020         vector<docstring> vec(styles.size());
1021         for (size_t i = 0; i != vec.size(); ++i) {
1022                 style = styles[i].cmd;
1023                 vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
1024         }
1025
1026         return vec;
1027 }
1028
1029
1030 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1031 {
1032         bimap_.insert(info.begin(), info.end());
1033         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1034         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1035 }
1036
1037
1038 namespace {
1039
1040 // used in xhtml to sort a list of BibTeXInfo objects
1041 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1042 {
1043         docstring const lauth = lhs->getAbbreviatedAuthor();
1044         docstring const rauth = rhs->getAbbreviatedAuthor();
1045         docstring const lyear = lhs->getYear();
1046         docstring const ryear = rhs->getYear();
1047         docstring const ltitl = lhs->operator[]("title");
1048         docstring const rtitl = rhs->operator[]("title");
1049         return  (lauth < rauth)
1050                 || (lauth == rauth && lyear < ryear)
1051                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1052 }
1053
1054 }
1055
1056
1057 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1058 {
1059         cited_entries_.clear();
1060         // We are going to collect all the citation keys used in the document,
1061         // getting them from the TOC.
1062         // FIXME We may want to collect these differently, in the first case,
1063         // so that we might have them in order of appearance.
1064         set<docstring> citekeys;
1065         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1066         Toc::const_iterator it = toc->begin();
1067         Toc::const_iterator const en = toc->end();
1068         for (; it != en; ++it) {
1069                 if (it->str().empty())
1070                         continue;
1071                 vector<docstring> const keys = getVectorFromString(it->str());
1072                 citekeys.insert(keys.begin(), keys.end());
1073         }
1074         if (citekeys.empty())
1075                 return;
1076
1077         // We have a set of the keys used in this document.
1078         // We will now convert it to a list of the BibTeXInfo objects used in
1079         // this document...
1080         vector<BibTeXInfo const *> bi;
1081         set<docstring>::const_iterator cit = citekeys.begin();
1082         set<docstring>::const_iterator const cen = citekeys.end();
1083         for (; cit != cen; ++cit) {
1084                 BiblioInfo::const_iterator const bt = find(*cit);
1085                 if (bt == end() || !bt->second.isBibTeX())
1086                         continue;
1087                 bi.push_back(&(bt->second));
1088         }
1089         // ...and sort it.
1090         sort(bi.begin(), bi.end(), lSorter);
1091
1092         // Now we can write the sorted keys
1093         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1094         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1095         for (; bit != ben; ++bit)
1096                 cited_entries_.push_back((*bit)->key());
1097 }
1098
1099
1100 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1101 {
1102         collectCitedEntries(buf);
1103         CiteEngineType const engine_type = buf.params().citeEngineType();
1104         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1105
1106         int keynumber = 0;
1107         char modifier = 0;
1108         // used to remember the last one we saw
1109         // we'll be comparing entries to see if we need to add
1110         // modifiers, like "1984a"
1111         map<docstring, BibTeXInfo>::iterator last;
1112
1113         vector<docstring>::const_iterator it = cited_entries_.begin();
1114         vector<docstring>::const_iterator const en = cited_entries_.end();
1115         for (; it != en; ++it) {
1116                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1117                 // this shouldn't happen, but...
1118                 if (biit == bimap_.end())
1119                         // ...fail gracefully, anyway.
1120                         continue;
1121                 BibTeXInfo & entry = biit->second;
1122                 if (numbers) {
1123                         docstring const num = convert<docstring>(++keynumber);
1124                         entry.setCiteNumber(num);
1125                 } else {
1126                         // coverity complains about our derefercing the iterator last,
1127                         // which was not initialized above. but it does get initialized
1128                         // after the first time through the loop, which is the point of
1129                         // the first test.
1130                         // coverity[FORWARD_NULL]
1131                         if (it != cited_entries_.begin()
1132                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1133                             // we access the year via getYear() so as to get it from the xref,
1134                             // if we need to do so
1135                             && getYear(entry.key()) == getYear(last->second.key())) {
1136                                 if (modifier == 0) {
1137                                         // so the last one should have been 'a'
1138                                         last->second.setModifier('a');
1139                                         modifier = 'b';
1140                                 } else if (modifier == 'z')
1141                                         modifier = 'A';
1142                                 else
1143                                         modifier++;
1144                         } else {
1145                                 modifier = 0;
1146                         }
1147                         entry.setModifier(modifier);
1148                         // remember the last one
1149                         last = biit;
1150                 }
1151         }
1152         // Set the labels
1153         it = cited_entries_.begin();
1154         for (; it != en; ++it) {
1155                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1156                 // this shouldn't happen, but...
1157                 if (biit == bimap_.end())
1158                         // ...fail gracefully, anyway.
1159                         continue;
1160                 BibTeXInfo & entry = biit->second;
1161                 if (numbers) {
1162                         entry.label(entry.citeNumber());
1163                 } else {
1164                         docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
1165                         // we do it this way so as to access the xref, if necessary
1166                         // note that this also gives us the modifier
1167                         docstring const year = getYear(*it, buf, true);
1168                         if (!auth.empty() && !year.empty())
1169                                 entry.label(auth + ' ' + year);
1170                         else
1171                                 entry.label(entry.key());
1172                 }
1173         }
1174 }
1175
1176
1177 //////////////////////////////////////////////////////////////////////
1178 //
1179 // CitationStyle
1180 //
1181 //////////////////////////////////////////////////////////////////////
1182
1183
1184 CitationStyle citationStyleFromString(string const & command)
1185 {
1186         CitationStyle cs;
1187         if (command.empty())
1188                 return cs;
1189
1190         string cmd = command;
1191         if (cmd[0] == 'C') {
1192                 cs.forceUpperCase = true;
1193                 cmd[0] = 'c';
1194         }
1195
1196         size_t const n = cmd.size() - 1;
1197         if (cmd[n] == '*') {
1198                 cs.fullAuthorList = true;
1199                 cmd = cmd.substr(0, n);
1200         }
1201
1202         cs.cmd = cmd;
1203         return cs;
1204 }
1205
1206
1207 string citationStyleToString(const CitationStyle & cs)
1208 {
1209         string cmd = cs.cmd;
1210         if (cs.forceUpperCase)
1211                 cmd[0] = uppercase(cmd[0]);
1212         if (cs.fullAuthorList)
1213                 cmd += '*';
1214         return cmd;
1215 }
1216
1217 } // namespace lyx