src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Citation.h"
  21 #include "Encoding.h"
  22 #include "InsetIterator.h"
  23 #include "Language.h"
  24 #include "output_xhtml.h"
  25 #include "Paragraph.h"
  26 #include "TextClass.h"
  27 #include "TocBackend.h"
  28
  29 #include "support/convert.h"
  30 #include "support/debug.h"
  31 #include "support/docstream.h"
  32 #include "support/gettext.h"
  33 #include "support/lassert.h"
  34 #include "support/lstrings.h"
  35 #include "support/regex.h"
  36 #include "support/textutils.h"
  37
  38 #include <set>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43
  44 namespace lyx {
  45
  46 namespace {
  47
  48 // gets the "family name" from an author-type string
  49 docstring familyName(docstring const & name)
  50 {
  51         if (name.empty())
  52                 return docstring();
  53
  54         // first we look for a comma, and take the last name to be everything
  55         // preceding the right-most one, so that we also get the "jr" part.
  56         docstring::size_type idx = name.rfind(',');
  57         if (idx != docstring::npos)
  58                 return ltrim(name.substr(0, idx));
  59
  60         // OK, so now we want to look for the last name. We're going to
  61         // include the "von" part. This isn't perfect.
  62         // Split on spaces, to get various tokens.
  63         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  64         // If we only get two, assume the last one is the last name
  65         if (pieces.size() <= 2)
  66                 return pieces.back();
  67
  68         // Now we look for the first token that begins with a lower case letter.
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator en = pieces.end();
  71         for (; it != en; ++it) {
  72                 if ((*it).empty())
  73                         continue;
  74                 char_type const c = (*it)[0];
  75                 if (isLower(c))
  76                         break;
  77         }
  78
  79         if (it == en) // we never found a "von"
  80                 return pieces.back();
  81
  82         // reconstruct what we need to return
  83         docstring retval;
  84         bool first = true;
  85         for (; it != en; ++it) {
  86                 if (!first)
  87                         retval += " ";
  88                 else
  89                         first = false;
  90                 retval += *it;
  91         }
  92         return retval;
  93 }
  94
  95
  96 // converts a string containing LaTeX commands into unicode
  97 // for display.
  98 docstring convertLaTeXCommands(docstring const & str)
  99 {
 100         docstring val = str;
 101         docstring ret;
 102
 103         bool scanning_cmd = false;
 104         bool scanning_math = false;
 105         bool escaped = false; // used to catch \$, etc.
 106         while (!val.empty()) {
 107                 char_type const ch = val[0];
 108
 109                 // if we're scanning math, we output everything until we
 110                 // find an unescaped $, at which point we break out.
 111                 if (scanning_math) {
 112                         if (escaped)
 113                                 escaped = false;
 114                         else if (ch == '\\')
 115                                 escaped = true;
 116                         else if (ch == '$')
 117                                 scanning_math = false;
 118                         ret += ch;
 119                         val = val.substr(1);
 120                         continue;
 121                 }
 122
 123                 // if we're scanning a command name, then we just
 124                 // discard characters until we hit something that
 125                 // isn't alpha.
 126                 if (scanning_cmd) {
 127                         if (isAlphaASCII(ch)) {
 128                                 val = val.substr(1);
 129                                 escaped = false;
 130                                 continue;
 131                         }
 132                         // so we're done with this command.
 133                         // now we fall through and check this character.
 134                         scanning_cmd = false;
 135                 }
 136
 137                 // was the last character a \? If so, then this is something like:
 138                 // \\ or \$, so we'll just output it. That's probably not always right...
 139                 if (escaped) {
 140                         // exception: output \, as THIN SPACE
 141                         if (ch == ',')
 142                                 ret.push_back(0x2009);
 143                         else
 144                                 ret += ch;
 145                         val = val.substr(1);
 146                         escaped = false;
 147                         continue;
 148                 }
 149
 150                 if (ch == '$') {
 151                         ret += ch;
 152                         val = val.substr(1);
 153                         scanning_math = true;
 154                         continue;
 155                 }
 156
 157                 // we just ignore braces
 158                 if (ch == '{' || ch == '}') {
 159                         val = val.substr(1);
 160                         continue;
 161                 }
 162
 163                 // we're going to check things that look like commands, so if
 164                 // this doesn't, just output it.
 165                 if (ch != '\\') {
 166                         ret += ch;
 167                         val = val.substr(1);
 168                         continue;
 169                 }
 170
 171                 // ok, could be a command of some sort
 172                 // let's see if it corresponds to some unicode
 173                 // unicodesymbols has things in the form: \"{u},
 174                 // whereas we may see things like: \"u. So we'll
 175                 // look for that and change it, if necessary.
 176                 // FIXME: This is a sort of mini-tex2lyx.
 177                 //        Use the real tex2lyx instead!
 178                 static lyx::regex const reg("^\\\\\\W\\w");
 179                 if (lyx::regex_search(to_utf8(val), reg)) {
 180                         val.insert(3, from_ascii("}"));
 181                         val.insert(2, from_ascii("{"));
 182                 }
 183                 bool termination;
 184                 docstring rem;
 185                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 186                                 Encodings::TEXT_CMD, termination, rem);
 187                 if (!cnvtd.empty()) {
 188                         // it did, so we'll take that bit and proceed with what's left
 189                         ret += cnvtd;
 190                         val = rem;
 191                         continue;
 192                 }
 193                 // it's a command of some sort
 194                 scanning_cmd = true;
 195                 escaped = true;
 196                 val = val.substr(1);
 197         }
 198         return ret;
 199 }
 200
 201
 202 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 203 docstring processRichtext(docstring const & str, bool richtext)
 204 {
 205         docstring val = str;
 206         docstring ret;
 207
 208         bool scanning_rich = false;
 209         while (!val.empty()) {
 210                 char_type const ch = val[0];
 211                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 212                         // beginning of rich text
 213                         scanning_rich = true;
 214                         val = val.substr(2);
 215                         continue;
 216                 }
 217                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 218                         // end of rich text
 219                         scanning_rich = false;
 220                         val = val.substr(2);
 221                         continue;
 222                 }
 223                 if (richtext) {
 224                         if (scanning_rich)
 225                                 ret += ch;
 226                         else {
 227                                 // we need to escape '<' and '>'
 228                                 if (ch == '<')
 229                                         ret += "&lt;";
 230                                 else if (ch == '>')
 231                                         ret += "&gt;";
 232                                 else
 233                                         ret += ch;
 234                         }
 235                 } else if (!scanning_rich /* && !richtext */)
 236                         ret += ch;
 237                 // else the character is discarded, which will happen only if
 238                 // richtext == false and we are scanning rich text
 239                 val = val.substr(1);
 240         }
 241         return ret;
 242 }
 243
 244 } // anon namespace
 245
 246
 247 //////////////////////////////////////////////////////////////////////
 248 //
 249 // BibTeXInfo
 250 //
 251 //////////////////////////////////////////////////////////////////////
 252
 253 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 254         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 255           modifier_(0)
 256 {}
 257
 258
 259 docstring const BibTeXInfo::getAbbreviatedAuthor(
 260     Buffer const * buf, bool jurabib_style) const
 261 {
 262         if (!is_bibtex_) {
 263                 docstring const opt = label();
 264                 if (opt.empty())
 265                         return docstring();
 266
 267                 docstring authors;
 268                 docstring const remainder = trim(split(opt, authors, '('));
 269                 if (remainder.empty())
 270                         // in this case, we didn't find a "(",
 271                         // so we don't have author (year)
 272                         return docstring();
 273                 return authors;
 274         }
 275
 276         docstring author = operator[]("author");
 277         if (author.empty()) {
 278                 author = operator[]("editor");
 279                 if (author.empty())
 280                         return author;
 281         }
 282
 283         // FIXME Move this to a separate routine that can
 284         // be called from elsewhere.
 285         //
 286         // OK, we've got some names. Let's format them.
 287         // Try to split the author list on " and "
 288         vector<docstring> const authors =
 289                 getVectorFromString(author, from_ascii(" and "));
 290
 291         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 292                 docstring shortauthor = familyName(authors[0])
 293                         + "/" + familyName(authors[1]);
 294                 if (authors.size() == 3)
 295                         shortauthor += "/" + familyName(authors[2]);
 296                 return convertLaTeXCommands(shortauthor);
 297         }
 298
 299         docstring retval = familyName(authors[0]);
 300
 301         if (authors.size() == 2 && authors[1] != "others") {
 302                 docstring const dformat = buf ?
 303                         buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
 304                 retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
 305         } else if (authors.size() >= 2) {
 306                 // we get here either if the author list is longer than two names
 307                 // or if the second 'name' is "others". we do the same thing either
 308                 // way.
 309                 docstring const dformat = buf ?
 310                         buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
 311                 retval = bformat(dformat, familyName(authors[0]));
 312         }
 313
 314         return convertLaTeXCommands(retval);
 315 }
 316
 317
 318 docstring const BibTeXInfo::getYear() const
 319 {
 320         if (is_bibtex_) {
 321                 // first try legacy year field
 322                 docstring year = operator[]("year");
 323                 if (!year.empty())
 324                         return year;
 325                 // now try biblatex's date field
 326                 year = operator[]("date");
 327                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 328                 // We only want the years.
 329                 static regex const yreg("[-]?([\\d]{4}).*");
 330                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 331                 smatch sm;
 332                 string const date = to_utf8(year);
 333                 regex_match(date, sm, yreg);
 334                 year = from_ascii(sm[1]);
 335                 // check for an endyear
 336                 if (regex_match(date, sm, ereg))
 337                         year += char_type(0x2013) + from_ascii(sm[1]);
 338                 return year;
 339         }
 340
 341         docstring const opt = label();
 342         if (opt.empty())
 343                 return docstring();
 344
 345         docstring authors;
 346         docstring tmp = split(opt, authors, '(');
 347         if (tmp.empty())
 348                 // we don't have author (year)
 349                 return docstring();
 350         docstring year;
 351         tmp = split(tmp, year, ')');
 352         return year;
 353 }
 354
 355
 356 namespace {
 357
 358 docstring parseOptions(docstring const & format, string & optkey,
 359                     docstring & ifpart, docstring & elsepart);
 360
 361 // Calls parseOptions to deal with an embedded option, such as:
 362 //   {%number%[[, no.~%number%]]}
 363 // which must appear at the start of format. ifelsepart gets the
 364 // whole of the option, and we return what's left after the option.
 365 // we return format if there is an error.
 366 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 367 {
 368         LASSERT(format[0] == '{' && format[1] == '%', return format);
 369         string optkey;
 370         docstring ifpart;
 371         docstring elsepart;
 372         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 373         if (format == rest) { // parse error
 374                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 375                 return format;
 376         }
 377         LASSERT(rest.size() <= format.size(),
 378                 { ifelsepart = docstring(); return format; });
 379         ifelsepart = format.substr(0, format.size() - rest.size());
 380         return rest;
 381 }
 382
 383
 384 // Gets a "clause" from a format string, where the clause is
 385 // delimited by '[[' and ']]'. Returns what is left after the
 386 // clause is removed, and returns format if there is an error.
 387 docstring getClause(docstring const & format, docstring & clause)
 388 {
 389         docstring fmt = format;
 390         // remove '[['
 391         fmt = fmt.substr(2);
 392         // we'll remove characters from the front of fmt as we
 393         // deal with them
 394         while (!fmt.empty()) {
 395                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 396                         // that's the end
 397                         fmt = fmt.substr(2);
 398                         break;
 399                 }
 400                 // check for an embedded option
 401                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 402                         docstring part;
 403                         docstring const rest = parseEmbeddedOption(fmt, part);
 404                         if (fmt == rest) {
 405                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 406                                 return format;
 407                         }
 408                         clause += part;
 409                         fmt = rest;
 410                 } else { // it's just a normal character
 411                                 clause += fmt[0];
 412                                 fmt = fmt.substr(1);
 413                 }
 414         }
 415         return fmt;
 416 }
 417
 418
 419 // parse an options string, which must appear at the start of the
 420 // format parameter. puts the parsed bits in optkey, ifpart, and
 421 // elsepart and returns what's left after the option is removed.
 422 // if there's an error, it returns format itself.
 423 docstring parseOptions(docstring const & format, string & optkey,
 424                     docstring & ifpart, docstring & elsepart)
 425 {
 426         LASSERT(format[0] == '{' && format[1] == '%', return format);
 427         // strip '{%'
 428         docstring fmt = format.substr(2);
 429         size_t pos = fmt.find('%'); // end of key
 430         if (pos == string::npos) {
 431                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 432                 return format;
 433         }
 434         optkey = to_utf8(fmt.substr(0, pos));
 435         fmt = fmt.substr(pos + 1);
 436         // [[format]] should be next
 437         if (fmt[0] != '[' || fmt[1] != '[') {
 438                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 439                 return format;
 440         }
 441
 442         docstring curfmt = fmt;
 443         fmt = getClause(curfmt, ifpart);
 444         if (fmt == curfmt) {
 445                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 446                 return format;
 447         }
 448
 449         if (fmt[0] == '}') // we're done, no else clause
 450                 return fmt.substr(1);
 451
 452         // else part should follow
 453         if (fmt[0] != '[' || fmt[1] != '[') {
 454                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 455                 return format;
 456         }
 457
 458         curfmt = fmt;
 459         fmt = getClause(curfmt, elsepart);
 460         // we should be done
 461         if (fmt == curfmt || fmt[0] != '}') {
 462                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 463                 return format;
 464         }
 465         return fmt.substr(1);
 466 }
 467
 468
 469 } // anon namespace
 470
 471 /* FIXME
 472 Bug #9131 revealed an oddity in how we are generating citation information
 473 when more than one key is given. We end up building a longer and longer format
 474 string as we go, which we then have to re-parse, over and over and over again,
 475 rather than generating the information for the individual keys and then putting
 476 all of that together. We do that to deal with the way separators work, from what
 477 I can tell, but it still feels like a hack. Fixing this would require quite a
 478 bit of work, however.
 479 */
 480 docstring BibTeXInfo::expandFormat(docstring const & format,
 481                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 482                 CiteItem const & ci, bool next, bool second) const
 483 {
 484         // incorrect use of macros could put us in an infinite loop
 485         static int const max_passes = 5000;
 486         // the use of overly large keys can lead to performance problems, due
 487         // to eventual attempts to convert LaTeX macros to unicode. See bug
 488         // #8944. This is perhaps not the best solution, but it will have to
 489         // do for now.
 490         static size_t const max_keysize = 128;
 491         odocstringstream ret; // return value
 492         string key;
 493         bool scanning_key = false;
 494         bool scanning_rich = false;
 495
 496         CiteEngineType const engine_type = buf.params().citeEngineType();
 497         docstring fmt = format;
 498         // we'll remove characters from the front of fmt as we
 499         // deal with them
 500         while (!fmt.empty()) {
 501                 if (counter > max_passes) {
 502                         LYXERR0("Recursion limit reached while parsing `"
 503                                 << format << "'.");
 504                         return _("ERROR!");
 505                 }
 506
 507                 char_type thischar = fmt[0];
 508                 if (thischar == '%') {
 509                         // beginning or end of key
 510                         if (scanning_key) {
 511                                 // end of key
 512                                 scanning_key = false;
 513                                 // so we replace the key with its value, which may be empty
 514                                 if (key[0] == '!') {
 515                                         // macro
 516                                         string const val =
 517                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 518                                         fmt = from_utf8(val) + fmt.substr(1);
 519                                         counter += 1;
 520                                         continue;
 521                                 } else if (key[0] == '_') {
 522                                         // a translatable bit
 523                                         string const val =
 524                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 525                                         docstring const trans =
 526                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 527                                         ret << trans;
 528                                 } else {
 529                                         docstring const val =
 530                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 531                                         if (!scanning_rich)
 532                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 533                                         ret << val;
 534                                         if (!scanning_rich)
 535                                                 ret << from_ascii("{!</span>!}");
 536                                 }
 537                         } else {
 538                                 // beginning of key
 539                                 key.clear();
 540                                 scanning_key = true;
 541                         }
 542                 }
 543                 else if (thischar == '{') {
 544                         // beginning of option?
 545                         if (scanning_key) {
 546                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 547                                 return _("ERROR!");
 548                         }
 549                         if (fmt.size() > 1) {
 550                                 if (fmt[1] == '%') {
 551                                         // it is the beginning of an optional format
 552                                         string optkey;
 553                                         docstring ifpart;
 554                                         docstring elsepart;
 555                                         docstring const newfmt =
 556                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 557                                         if (newfmt == fmt) // parse error
 558                                                 return _("ERROR!");
 559                                         fmt = newfmt;
 560                                         docstring const val =
 561                                                 getValueForKey(optkey, buf, ci, xrefs);
 562                                         if (optkey == "next" && next)
 563                                                 ret << ifpart; // without expansion
 564                                         else if (!val.empty()) {
 565                                                 int newcounter = 0;
 566                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 567                                                         ci, next);
 568                                         } else if (!elsepart.empty()) {
 569                                                 int newcounter = 0;
 570                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 571                                                         ci, next);
 572                                         }
 573                                         // fmt will have been shortened for us already
 574                                         continue;
 575                                 }
 576                                 if (fmt[1] == '!') {
 577                                         // beginning of rich text
 578                                         scanning_rich = true;
 579                                         fmt = fmt.substr(2);
 580                                         ret << from_ascii("{!");
 581                                         continue;
 582                                 }
 583                         }
 584                         // we are here if '{' was not followed by % or !.
 585                         // So it's just a character.
 586                         ret << thischar;
 587                 }
 588                 else if (scanning_rich && thischar == '!'
 589                          && fmt.size() > 1 && fmt[1] == '}') {
 590                         // end of rich text
 591                         scanning_rich = false;
 592                         fmt = fmt.substr(2);
 593                         ret << from_ascii("!}");
 594                         continue;
 595                 }
 596                 else if (scanning_key)
 597                         key += char(thischar);
 598                 else {
 599                         try {
 600                                 ret.put(thischar);
 601                         } catch (EncodingException & /* e */) {
 602                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 603                         }
 604                 }
 605                 fmt = fmt.substr(1);
 606         } // for loop
 607         if (scanning_key) {
 608                 LYXERR0("Never found end of key in `" << format << "'!");
 609                 return _("ERROR!");
 610         }
 611         if (scanning_rich) {
 612                 LYXERR0("Never found end of rich text in `" << format << "'!");
 613                 return _("ERROR!");
 614         }
 615         return ret.str();
 616 }
 617
 618
 619 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 620         Buffer const & buf, CiteItem const & ci) const
 621 {
 622         bool const richtext = ci.richtext;
 623
 624         if (!richtext && !info_.empty())
 625                 return info_;
 626         if (richtext && !info_richtext_.empty())
 627                 return info_richtext_;
 628
 629         if (!is_bibtex_) {
 630                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 631                 info_ = it->second;
 632                 return info_;
 633         }
 634
 635         CiteEngineType const engine_type = buf.params().citeEngineType();
 636         DocumentClass const & dc = buf.params().documentClass();
 637         docstring const & format =
 638                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 639         int counter = 0;
 640         info_ = expandFormat(format, xrefs, counter, buf,
 641                 ci, false, false);
 642
 643         if (info_.empty()) {
 644                 // this probably shouldn't happen
 645                 return info_;
 646         }
 647
 648         if (richtext) {
 649                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 650                 return info_richtext_;
 651         }
 652
 653         info_ = convertLaTeXCommands(processRichtext(info_, false));
 654         return info_;
 655 }
 656
 657
 658 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 659         Buffer const & buf, docstring const & format,
 660         CiteItem const & ci, bool next, bool second) const
 661 {
 662         docstring loclabel;
 663
 664         int counter = 0;
 665         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 666
 667         if (!loclabel.empty() && !next) {
 668                 loclabel = processRichtext(loclabel, ci.richtext);
 669                 loclabel = convertLaTeXCommands(loclabel);
 670         }
 671
 672         return loclabel;
 673 }
 674
 675
 676 docstring const & BibTeXInfo::operator[](docstring const & field) const
 677 {
 678         BibTeXInfo::const_iterator it = find(field);
 679         if (it != end())
 680                 return it->second;
 681         static docstring const empty_value = docstring();
 682         return empty_value;
 683 }
 684
 685
 686 docstring const & BibTeXInfo::operator[](string const & field) const
 687 {
 688         return operator[](from_ascii(field));
 689 }
 690
 691
 692 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 693         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 694 {
 695         // anything less is pointless
 696         LASSERT(maxsize >= 16, maxsize = 16);
 697         string key = oldkey;
 698         bool cleanit = false;
 699         if (prefixIs(oldkey, "clean:")) {
 700                 key = oldkey.substr(6);
 701                 cleanit = true;
 702         }
 703
 704         docstring ret = operator[](key);
 705         if (ret.empty() && !xrefs.empty()) {
 706                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 707                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 708                 for (; it != en; ++it) {
 709                         if (*it && !(**it)[key].empty()) {
 710                                 ret = (**it)[key];
 711                                 break;
 712                         }
 713                 }
 714         }
 715         if (ret.empty()) {
 716                 // some special keys
 717                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 718                 if (key == "dialog" && ci.context == CiteItem::Dialog)
 719                         ret = from_ascii("x"); // any non-empty string will do
 720                 else if (key == "entrytype")
 721                         ret = entry_type_;
 722                 else if (key == "key")
 723                         ret = bib_key_;
 724                 else if (key == "label")
 725                         ret = label_;
 726                 else if (key == "modifier" && modifier_ != 0)
 727                         ret = modifier_;
 728                 else if (key == "numericallabel")
 729                         ret = cite_number_;
 730                 else if (key == "abbrvauthor")
 731                         // Special key to provide abbreviated author names.
 732                         ret = getAbbreviatedAuthor(&buf, false);
 733                 else if (key == "shortauthor")
 734                         // When shortauthor is not defined, jurabib automatically
 735                         // provides jurabib-style abbreviated author names. We do
 736                         // this as well.
 737                         ret = getAbbreviatedAuthor(&buf, true);
 738                 else if (key == "shorttitle") {
 739                         // When shorttitle is not defined, jurabib uses for `article'
 740                         // and `periodical' entries the form `journal volume [year]'
 741                         // and for other types of entries it uses the `title' field.
 742                         if (entry_type_ == "article" || entry_type_ == "periodical")
 743                                 ret = operator[]("journal") + " " + operator[]("volume")
 744                                         + " [" + operator[]("year") + "]";
 745                         else
 746                                 ret = operator[]("title");
 747                 } else if (key == "bibentry") {
 748                         // Special key to provide the full bibliography entry: see getInfo()
 749                         CiteEngineType const engine_type = buf.params().citeEngineType();
 750                         DocumentClass const & dc = buf.params().documentClass();
 751                         docstring const & format =
 752                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 753                         int counter = 0;
 754                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
 755                 } else if (key == "textbefore")
 756                         ret = ci.textBefore;
 757                 else if (key == "textafter")
 758                         ret = ci.textAfter;
 759                 else if (key == "year")
 760                         ret = getYear();
 761         }
 762
 763         if (cleanit)
 764                 ret = html::cleanAttr(ret);
 765
 766         // make sure it is not too big
 767         support::truncateWithEllipsis(ret, maxsize);
 768         return ret;
 769 }
 770
 771
 772 //////////////////////////////////////////////////////////////////////
 773 //
 774 // BiblioInfo
 775 //
 776 //////////////////////////////////////////////////////////////////////
 777
 778 namespace {
 779
 780 // A functor for use with sort, leading to case insensitive sorting
 781 class compareNoCase: public binary_function<docstring, docstring, bool>
 782 {
 783 public:
 784         bool operator()(docstring const & s1, docstring const & s2) const {
 785                 return compare_no_case(s1, s2) < 0;
 786         }
 787 };
 788
 789 } // namespace anon
 790
 791
 792 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
 793 {
 794         vector<docstring> result;
 795         if (!data.isBibTeX())
 796                 return result;
 797         // Legacy crossref field. This is not nestable.
 798         if (!nested && !data["crossref"].empty()) {
 799                 docstring const xrefkey = data["crossref"];
 800                 result.push_back(xrefkey);
 801                 // However, check for nested xdatas
 802                 BiblioInfo::const_iterator it = find(xrefkey);
 803                 if (it != end()) {
 804                         BibTeXInfo const & xref = it->second;
 805                         vector<docstring> const nxdata = getXRefs(xref, true);
 806                         if (!nxdata.empty())
 807                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
 808                 }
 809         }
 810         // Biblatex's xdata field. Infinitely nestable.
 811         // XData field can consist of a comma-separated list of keys
 812         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
 813         if (!xdatakeys.empty()) {
 814                 vector<docstring>::const_iterator xit = xdatakeys.begin();
 815                 vector<docstring>::const_iterator xen = xdatakeys.end();
 816                 for (; xit != xen; ++xit) {
 817                         docstring const xdatakey = *xit;
 818                         result.push_back(xdatakey);
 819                         BiblioInfo::const_iterator it = find(xdatakey);
 820                         if (it != end()) {
 821                                 BibTeXInfo const & xdata = it->second;
 822                                 vector<docstring> const nxdata = getXRefs(xdata, true);
 823                                 if (!nxdata.empty())
 824                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
 825                         }
 826                 }
 827         }
 828         return result;
 829 }
 830
 831
 832 vector<docstring> const BiblioInfo::getKeys() const
 833 {
 834         vector<docstring> bibkeys;
 835         BiblioInfo::const_iterator it  = begin();
 836         for (; it != end(); ++it)
 837                 bibkeys.push_back(it->first);
 838         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 839         return bibkeys;
 840 }
 841
 842
 843 vector<docstring> const BiblioInfo::getFields() const
 844 {
 845         vector<docstring> bibfields;
 846         set<docstring>::const_iterator it = field_names_.begin();
 847         set<docstring>::const_iterator end = field_names_.end();
 848         for (; it != end; ++it)
 849                 bibfields.push_back(*it);
 850         sort(bibfields.begin(), bibfields.end());
 851         return bibfields;
 852 }
 853
 854
 855 vector<docstring> const BiblioInfo::getEntries() const
 856 {
 857         vector<docstring> bibentries;
 858         set<docstring>::const_iterator it = entry_types_.begin();
 859         set<docstring>::const_iterator end = entry_types_.end();
 860         for (; it != end; ++it)
 861                 bibentries.push_back(*it);
 862         sort(bibentries.begin(), bibentries.end());
 863         return bibentries;
 864 }
 865
 866
 867 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 868 {
 869         BiblioInfo::const_iterator it = find(key);
 870         if (it == end())
 871                 return docstring();
 872         BibTeXInfo const & data = it->second;
 873         return data.getAbbreviatedAuthor(&buf, false);
 874 }
 875
 876
 877 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 878 {
 879         BiblioInfo::const_iterator it = find(key);
 880         if (it == end())
 881                 return docstring();
 882         BibTeXInfo const & data = it->second;
 883         return data.citeNumber();
 884 }
 885
 886
 887 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 888 {
 889         BiblioInfo::const_iterator it = find(key);
 890         if (it == end())
 891                 return docstring();
 892         BibTeXInfo const & data = it->second;
 893         docstring year = data.getYear();
 894         if (year.empty()) {
 895                 // let's try the crossrefs
 896                 vector<docstring> const xrefs = getXRefs(data);
 897                 if (xrefs.empty())
 898                         // no luck
 899                         return docstring();
 900                 vector<docstring>::const_iterator it = xrefs.begin();
 901                 vector<docstring>::const_iterator en = xrefs.end();
 902                 for (; it != en; ++it) {
 903                         BiblioInfo::const_iterator const xrefit = find(*it);
 904                         if (xrefit == end())
 905                                 continue;
 906                         BibTeXInfo const & xref_data = xrefit->second;
 907                         year = xref_data.getYear();
 908                         if (!year.empty())
 909                                 // success!
 910                                 break;
 911                 }
 912         }
 913         if (use_modifier && data.modifier() != 0)
 914                 year += data.modifier();
 915         return year;
 916 }
 917
 918
 919 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 920 {
 921         docstring const year = getYear(key, use_modifier);
 922         if (year.empty())
 923                 return buf.B_("No year");
 924         return year;
 925 }
 926
 927
 928 docstring const BiblioInfo::getInfo(docstring const & key,
 929         Buffer const & buf, CiteItem const & ci) const
 930 {
 931         BiblioInfo::const_iterator it = find(key);
 932         if (it == end())
 933                 return docstring(_("Bibliography entry not found!"));
 934         BibTeXInfo const & data = it->second;
 935         BibTeXInfoList xrefptrs;
 936         vector<docstring> const xrefs = getXRefs(data);
 937         if (!xrefs.empty()) {
 938                 vector<docstring>::const_iterator it = xrefs.begin();
 939                 vector<docstring>::const_iterator en = xrefs.end();
 940                 for (; it != en; ++it) {
 941                         BiblioInfo::const_iterator const xrefit = find(*it);
 942                         if (xrefit != end())
 943                                 xrefptrs.push_back(&(xrefit->second));
 944                 }
 945         }
 946         return data.getInfo(xrefptrs, buf, ci);
 947 }
 948
 949
 950 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 951         Buffer const & buf, string const & style, CiteItem const & ci) const
 952 {
 953         size_t max_size = ci.max_size;
 954         // shorter makes no sense
 955         LASSERT(max_size >= 16, max_size = 16);
 956
 957         // we can't display more than 10 of these, anyway
 958         bool const too_many_keys = keys.size() > 10;
 959         if (too_many_keys)
 960                 keys.resize(10);
 961
 962         CiteEngineType const engine_type = buf.params().citeEngineType();
 963         DocumentClass const & dc = buf.params().documentClass();
 964         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 965         docstring ret = format;
 966         vector<docstring>::const_iterator key = keys.begin();
 967         vector<docstring>::const_iterator ken = keys.end();
 968         for (; key != ken; ++key) {
 969                 BiblioInfo::const_iterator it = find(*key);
 970                 BibTeXInfo empty_data;
 971                 empty_data.key(*key);
 972                 BibTeXInfo & data = empty_data;
 973                 vector<BibTeXInfo const *> xrefptrs;
 974                 if (it != end()) {
 975                         data = it->second;
 976                         vector<docstring> const xrefs = getXRefs(data);
 977                         if (!xrefs.empty()) {
 978                                 vector<docstring>::const_iterator it = xrefs.begin();
 979                                 vector<docstring>::const_iterator en = xrefs.end();
 980                                 for (; it != en; ++it) {
 981                                         BiblioInfo::const_iterator const xrefit = find(*it);
 982                                         if (xrefit != end())
 983                                                 xrefptrs.push_back(&(xrefit->second));
 984                                 }
 985                         }
 986                 }
 987                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
 988         }
 989
 990         if (too_many_keys)
 991                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 992         support::truncateWithEllipsis(ret, max_size);
 993         return ret;
 994 }
 995
 996
 997 bool BiblioInfo::isBibtex(docstring const & key) const
 998 {
 999         docstring key1;
1000         split(key, key1, ',');
1001         BiblioInfo::const_iterator it = find(key1);
1002         if (it == end())
1003                 return false;
1004         return it->second.isBibTeX();
1005 }
1006
1007
1008 vector<docstring> const BiblioInfo::getCiteStrings(
1009         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1010         Buffer const & buf, CiteItem const & ci) const
1011 {
1012         if (empty())
1013                 return vector<docstring>();
1014
1015         string style;
1016         vector<docstring> vec(styles.size());
1017         for (size_t i = 0; i != vec.size(); ++i) {
1018                 style = styles[i].name;
1019                 vec[i] = getLabel(keys, buf, style, ci);
1020         }
1021
1022         return vec;
1023 }
1024
1025
1026 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1027 {
1028         bimap_.insert(info.begin(), info.end());
1029         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1030         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1031 }
1032
1033
1034 namespace {
1035
1036 // used in xhtml to sort a list of BibTeXInfo objects
1037 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1038 {
1039         docstring const lauth = lhs->getAbbreviatedAuthor();
1040         docstring const rauth = rhs->getAbbreviatedAuthor();
1041         docstring const lyear = lhs->getYear();
1042         docstring const ryear = rhs->getYear();
1043         docstring const ltitl = lhs->operator[]("title");
1044         docstring const rtitl = rhs->operator[]("title");
1045         return  (lauth < rauth)
1046                 || (lauth == rauth && lyear < ryear)
1047                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1048 }
1049
1050 }
1051
1052
1053 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1054 {
1055         cited_entries_.clear();
1056         // We are going to collect all the citation keys used in the document,
1057         // getting them from the TOC.
1058         // FIXME We may want to collect these differently, in the first case,
1059         // so that we might have them in order of appearance.
1060         set<docstring> citekeys;
1061         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1062         Toc::const_iterator it = toc->begin();
1063         Toc::const_iterator const en = toc->end();
1064         for (; it != en; ++it) {
1065                 if (it->str().empty())
1066                         continue;
1067                 vector<docstring> const keys = getVectorFromString(it->str());
1068                 citekeys.insert(keys.begin(), keys.end());
1069         }
1070         if (citekeys.empty())
1071                 return;
1072
1073         // We have a set of the keys used in this document.
1074         // We will now convert it to a list of the BibTeXInfo objects used in
1075         // this document...
1076         vector<BibTeXInfo const *> bi;
1077         set<docstring>::const_iterator cit = citekeys.begin();
1078         set<docstring>::const_iterator const cen = citekeys.end();
1079         for (; cit != cen; ++cit) {
1080                 BiblioInfo::const_iterator const bt = find(*cit);
1081                 if (bt == end() || !bt->second.isBibTeX())
1082                         continue;
1083                 bi.push_back(&(bt->second));
1084         }
1085         // ...and sort it.
1086         sort(bi.begin(), bi.end(), lSorter);
1087
1088         // Now we can write the sorted keys
1089         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1090         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1091         for (; bit != ben; ++bit)
1092                 cited_entries_.push_back((*bit)->key());
1093 }
1094
1095
1096 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1097 {
1098         collectCitedEntries(buf);
1099         CiteEngineType const engine_type = buf.params().citeEngineType();
1100         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1101
1102         int keynumber = 0;
1103         char modifier = 0;
1104         // used to remember the last one we saw
1105         // we'll be comparing entries to see if we need to add
1106         // modifiers, like "1984a"
1107         map<docstring, BibTeXInfo>::iterator last;
1108
1109         vector<docstring>::const_iterator it = cited_entries_.begin();
1110         vector<docstring>::const_iterator const en = cited_entries_.end();
1111         for (; it != en; ++it) {
1112                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1113                 // this shouldn't happen, but...
1114                 if (biit == bimap_.end())
1115                         // ...fail gracefully, anyway.
1116                         continue;
1117                 BibTeXInfo & entry = biit->second;
1118                 if (numbers) {
1119                         docstring const num = convert<docstring>(++keynumber);
1120                         entry.setCiteNumber(num);
1121                 } else {
1122                         // coverity complains about our derefercing the iterator last,
1123                         // which was not initialized above. but it does get initialized
1124                         // after the first time through the loop, which is the point of
1125                         // the first test.
1126                         // coverity[FORWARD_NULL]
1127                         if (it != cited_entries_.begin()
1128                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1129                             // we access the year via getYear() so as to get it from the xref,
1130                             // if we need to do so
1131                             && getYear(entry.key()) == getYear(last->second.key())) {
1132                                 if (modifier == 0) {
1133                                         // so the last one should have been 'a'
1134                                         last->second.setModifier('a');
1135                                         modifier = 'b';
1136                                 } else if (modifier == 'z')
1137                                         modifier = 'A';
1138                                 else
1139                                         modifier++;
1140                         } else {
1141                                 modifier = 0;
1142                         }
1143                         entry.setModifier(modifier);
1144                         // remember the last one
1145                         last = biit;
1146                 }
1147         }
1148         // Set the labels
1149         it = cited_entries_.begin();
1150         for (; it != en; ++it) {
1151                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1152                 // this shouldn't happen, but...
1153                 if (biit == bimap_.end())
1154                         // ...fail gracefully, anyway.
1155                         continue;
1156                 BibTeXInfo & entry = biit->second;
1157                 if (numbers) {
1158                         entry.label(entry.citeNumber());
1159                 } else {
1160                         docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
1161                         // we do it this way so as to access the xref, if necessary
1162                         // note that this also gives us the modifier
1163                         docstring const year = getYear(*it, buf, true);
1164                         if (!auth.empty() && !year.empty())
1165                                 entry.label(auth + ' ' + year);
1166                         else
1167                                 entry.label(entry.key());
1168                 }
1169         }
1170 }
1171
1172
1173 //////////////////////////////////////////////////////////////////////
1174 //
1175 // CitationStyle
1176 //
1177 //////////////////////////////////////////////////////////////////////
1178
1179
1180 CitationStyle citationStyleFromString(string const & command,
1181                                       BufferParams const & params)
1182 {
1183         CitationStyle cs;
1184         if (command.empty())
1185                 return cs;
1186
1187         string const alias = params.getCiteAlias(command);
1188         string cmd = alias.empty() ? command : alias;
1189         if (isUpperCase(command[0])) {
1190                 cs.forceUpperCase = true;
1191                 cmd[0] = lowercase(cmd[0]);
1192         }
1193
1194         size_t const n = command.size() - 1;
1195         if (command[n] == '*') {
1196                 cs.hasStarredVersion = true;
1197                 if (suffixIs(cmd, '*'))
1198                         cmd = cmd.substr(0, cmd.size() - 1);
1199         }
1200
1201         cs.name = cmd;
1202         return cs;
1203 }
1204
1205
1206 string citationStyleToString(const CitationStyle & cs, bool const latex)
1207 {
1208         string cmd = latex ? cs.cmd : cs.name;
1209         if (cs.forceUpperCase)
1210                 cmd[0] = uppercase(cmd[0]);
1211         if (cs.hasStarredVersion)
1212                 cmd += '*';
1213         return cmd;
1214 }
1215
1216 } // namespace lyx