src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Citation.h"
  21 #include "Encoding.h"
  22 #include "InsetIterator.h"
  23 #include "Language.h"
  24 #include "output_xhtml.h"
  25 #include "Paragraph.h"
  26 #include "TextClass.h"
  27 #include "TocBackend.h"
  28
  29 #include "support/convert.h"
  30 #include "support/debug.h"
  31 #include "support/docstream.h"
  32 #include "support/gettext.h"
  33 #include "support/lassert.h"
  34 #include "support/lstrings.h"
  35 #include "support/regex.h"
  36 #include "support/textutils.h"
  37
  38 #include <set>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43
  44 namespace lyx {
  45
  46 namespace {
  47
  48 // gets the "family name" from an author-type string
  49 docstring familyName(docstring const & name)
  50 {
  51         if (name.empty())
  52                 return docstring();
  53
  54         // first we look for a comma, and take the last name to be everything
  55         // preceding the right-most one, so that we also get the "jr" part.
  56         docstring::size_type idx = name.rfind(',');
  57         if (idx != docstring::npos)
  58                 return ltrim(name.substr(0, idx));
  59
  60         // OK, so now we want to look for the last name. We're going to
  61         // include the "von" part. This isn't perfect.
  62         // Split on spaces, to get various tokens.
  63         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  64         // If we only get two, assume the last one is the last name
  65         if (pieces.size() <= 2)
  66                 return pieces.back();
  67
  68         // Now we look for the first token that begins with a lower case letter.
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator en = pieces.end();
  71         for (; it != en; ++it) {
  72                 if ((*it).empty())
  73                         continue;
  74                 char_type const c = (*it)[0];
  75                 if (isLower(c))
  76                         break;
  77         }
  78
  79         if (it == en) // we never found a "von"
  80                 return pieces.back();
  81
  82         // reconstruct what we need to return
  83         docstring retval;
  84         bool first = true;
  85         for (; it != en; ++it) {
  86                 if (!first)
  87                         retval += " ";
  88                 else
  89                         first = false;
  90                 retval += *it;
  91         }
  92         return retval;
  93 }
  94
  95
  96 // converts a string containing LaTeX commands into unicode
  97 // for display.
  98 docstring convertLaTeXCommands(docstring const & str)
  99 {
 100         docstring val = str;
 101         docstring ret;
 102
 103         bool scanning_cmd = false;
 104         bool scanning_math = false;
 105         bool escaped = false; // used to catch \$, etc.
 106         while (!val.empty()) {
 107                 char_type const ch = val[0];
 108
 109                 // if we're scanning math, we output everything until we
 110                 // find an unescaped $, at which point we break out.
 111                 if (scanning_math) {
 112                         if (escaped)
 113                                 escaped = false;
 114                         else if (ch == '\\')
 115                                 escaped = true;
 116                         else if (ch == '$')
 117                                 scanning_math = false;
 118                         ret += ch;
 119                         val = val.substr(1);
 120                         continue;
 121                 }
 122
 123                 // if we're scanning a command name, then we just
 124                 // discard characters until we hit something that
 125                 // isn't alpha.
 126                 if (scanning_cmd) {
 127                         if (isAlphaASCII(ch)) {
 128                                 val = val.substr(1);
 129                                 escaped = false;
 130                                 continue;
 131                         }
 132                         // so we're done with this command.
 133                         // now we fall through and check this character.
 134                         scanning_cmd = false;
 135                 }
 136
 137                 // was the last character a \? If so, then this is something like:
 138                 // \\ or \$, so we'll just output it. That's probably not always right...
 139                 if (escaped) {
 140                         // exception: output \, as THIN SPACE
 141                         if (ch == ',')
 142                                 ret.push_back(0x2009);
 143                         else
 144                                 ret += ch;
 145                         val = val.substr(1);
 146                         escaped = false;
 147                         continue;
 148                 }
 149
 150                 if (ch == '$') {
 151                         ret += ch;
 152                         val = val.substr(1);
 153                         scanning_math = true;
 154                         continue;
 155                 }
 156
 157                 // we just ignore braces
 158                 if (ch == '{' || ch == '}') {
 159                         val = val.substr(1);
 160                         continue;
 161                 }
 162
 163                 // we're going to check things that look like commands, so if
 164                 // this doesn't, just output it.
 165                 if (ch != '\\') {
 166                         ret += ch;
 167                         val = val.substr(1);
 168                         continue;
 169                 }
 170
 171                 // ok, could be a command of some sort
 172                 // let's see if it corresponds to some unicode
 173                 // unicodesymbols has things in the form: \"{u},
 174                 // whereas we may see things like: \"u. So we'll
 175                 // look for that and change it, if necessary.
 176                 // FIXME: This is a sort of mini-tex2lyx.
 177                 //        Use the real tex2lyx instead!
 178                 static lyx::regex const reg("^\\\\\\W\\w");
 179                 if (lyx::regex_search(to_utf8(val), reg)) {
 180                         val.insert(3, from_ascii("}"));
 181                         val.insert(2, from_ascii("{"));
 182                 }
 183                 bool termination;
 184                 docstring rem;
 185                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 186                                 Encodings::TEXT_CMD, termination, rem);
 187                 if (!cnvtd.empty()) {
 188                         // it did, so we'll take that bit and proceed with what's left
 189                         ret += cnvtd;
 190                         val = rem;
 191                         continue;
 192                 }
 193                 // it's a command of some sort
 194                 scanning_cmd = true;
 195                 escaped = true;
 196                 val = val.substr(1);
 197         }
 198         return ret;
 199 }
 200
 201
 202 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 203 docstring processRichtext(docstring const & str, bool richtext)
 204 {
 205         docstring val = str;
 206         docstring ret;
 207
 208         bool scanning_rich = false;
 209         while (!val.empty()) {
 210                 char_type const ch = val[0];
 211                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 212                         // beginning of rich text
 213                         scanning_rich = true;
 214                         val = val.substr(2);
 215                         continue;
 216                 }
 217                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 218                         // end of rich text
 219                         scanning_rich = false;
 220                         val = val.substr(2);
 221                         continue;
 222                 }
 223                 if (richtext) {
 224                         if (scanning_rich)
 225                                 ret += ch;
 226                         else {
 227                                 // we need to escape '<' and '>'
 228                                 if (ch == '<')
 229                                         ret += "&lt;";
 230                                 else if (ch == '>')
 231                                         ret += "&gt;";
 232                                 else
 233                                         ret += ch;
 234                         }
 235                 } else if (!scanning_rich /* && !richtext */)
 236                         ret += ch;
 237                 // else the character is discarded, which will happen only if
 238                 // richtext == false and we are scanning rich text
 239                 val = val.substr(1);
 240         }
 241         return ret;
 242 }
 243
 244 } // anon namespace
 245
 246
 247 //////////////////////////////////////////////////////////////////////
 248 //
 249 // BibTeXInfo
 250 //
 251 //////////////////////////////////////////////////////////////////////
 252
 253 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 254         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 255           modifier_(0)
 256 {}
 257
 258
 259 docstring const BibTeXInfo::getAbbreviatedAuthor(
 260     Buffer const * buf, bool jurabib_style) const
 261 {
 262         if (!is_bibtex_) {
 263                 docstring const opt = label();
 264                 if (opt.empty())
 265                         return docstring();
 266
 267                 docstring authors;
 268                 docstring const remainder = trim(split(opt, authors, '('));
 269                 if (remainder.empty())
 270                         // in this case, we didn't find a "(",
 271                         // so we don't have author (year)
 272                         return docstring();
 273                 return authors;
 274         }
 275
 276         docstring author = operator[]("author");
 277         if (author.empty()) {
 278                 author = operator[]("editor");
 279                 if (author.empty())
 280                         return author;
 281         }
 282
 283         // FIXME Move this to a separate routine that can
 284         // be called from elsewhere.
 285         //
 286         // OK, we've got some names. Let's format them.
 287         // Try to split the author list on " and "
 288         vector<docstring> const authors =
 289                 getVectorFromString(author, from_ascii(" and "));
 290
 291         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 292                 docstring shortauthor = familyName(authors[0])
 293                         + "/" + familyName(authors[1]);
 294                 if (authors.size() == 3)
 295                         shortauthor += "/" + familyName(authors[2]);
 296                 return convertLaTeXCommands(shortauthor);
 297         }
 298
 299         docstring retval = familyName(authors[0]);
 300
 301         if (authors.size() == 2 && authors[1] != "others") {
 302                 docstring const dformat = buf ?
 303                         buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
 304                 retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
 305         } else if (authors.size() >= 2) {
 306                 // we get here either if the author list is longer than two names
 307                 // or if the second 'name' is "others". we do the same thing either
 308                 // way.
 309                 docstring const dformat = buf ?
 310                         buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
 311                 retval = bformat(dformat, familyName(authors[0]));
 312         }
 313
 314         return convertLaTeXCommands(retval);
 315 }
 316
 317
 318 docstring const BibTeXInfo::getYear() const
 319 {
 320         if (is_bibtex_) {
 321                 // first try legacy year field
 322                 docstring year = operator[]("year");
 323                 if (!year.empty())
 324                         return year;
 325                 // now try biblatex's date field
 326                 year = operator[]("date");
 327                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 328                 // We only want the years.
 329                 static regex const yreg("[-]?([\\d]{4}).*");
 330                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 331                 smatch sm;
 332                 string const date = to_utf8(year);
 333                 regex_match(date, sm, yreg);
 334                 year = from_ascii(sm[1]);
 335                 // check for an endyear
 336                 if (regex_match(date, sm, ereg))
 337                         year += char_type(0x2013) + from_ascii(sm[1]);
 338                 return year;
 339         }
 340
 341         docstring const opt = label();
 342         if (opt.empty())
 343                 return docstring();
 344
 345         docstring authors;
 346         docstring tmp = split(opt, authors, '(');
 347         if (tmp.empty())
 348                 // we don't have author (year)
 349                 return docstring();
 350         docstring year;
 351         tmp = split(tmp, year, ')');
 352         return year;
 353 }
 354
 355
 356 namespace {
 357
 358 docstring parseOptions(docstring const & format, string & optkey,
 359                     docstring & ifpart, docstring & elsepart);
 360
 361 // Calls parseOptions to deal with an embedded option, such as:
 362 //   {%number%[[, no.~%number%]]}
 363 // which must appear at the start of format. ifelsepart gets the
 364 // whole of the option, and we return what's left after the option.
 365 // we return format if there is an error.
 366 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 367 {
 368         LASSERT(format[0] == '{' && format[1] == '%', return format);
 369         string optkey;
 370         docstring ifpart;
 371         docstring elsepart;
 372         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 373         if (format == rest) { // parse error
 374                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 375                 return format;
 376         }
 377         LASSERT(rest.size() <= format.size(),
 378                 { ifelsepart = docstring(); return format; });
 379         ifelsepart = format.substr(0, format.size() - rest.size());
 380         return rest;
 381 }
 382
 383
 384 // Gets a "clause" from a format string, where the clause is
 385 // delimited by '[[' and ']]'. Returns what is left after the
 386 // clause is removed, and returns format if there is an error.
 387 docstring getClause(docstring const & format, docstring & clause)
 388 {
 389         docstring fmt = format;
 390         // remove '[['
 391         fmt = fmt.substr(2);
 392         // we'll remove characters from the front of fmt as we
 393         // deal with them
 394         while (!fmt.empty()) {
 395                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 396                         // that's the end
 397                         fmt = fmt.substr(2);
 398                         break;
 399                 }
 400                 // check for an embedded option
 401                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 402                         docstring part;
 403                         docstring const rest = parseEmbeddedOption(fmt, part);
 404                         if (fmt == rest) {
 405                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 406                                 return format;
 407                         }
 408                         clause += part;
 409                         fmt = rest;
 410                 } else { // it's just a normal character
 411                                 clause += fmt[0];
 412                                 fmt = fmt.substr(1);
 413                 }
 414         }
 415         return fmt;
 416 }
 417
 418
 419 // parse an options string, which must appear at the start of the
 420 // format parameter. puts the parsed bits in optkey, ifpart, and
 421 // elsepart and returns what's left after the option is removed.
 422 // if there's an error, it returns format itself.
 423 docstring parseOptions(docstring const & format, string & optkey,
 424                     docstring & ifpart, docstring & elsepart)
 425 {
 426         LASSERT(format[0] == '{' && format[1] == '%', return format);
 427         // strip '{%'
 428         docstring fmt = format.substr(2);
 429         size_t pos = fmt.find('%'); // end of key
 430         if (pos == string::npos) {
 431                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 432                 return format;
 433         }
 434         optkey = to_utf8(fmt.substr(0, pos));
 435         fmt = fmt.substr(pos + 1);
 436         // [[format]] should be next
 437         if (fmt[0] != '[' || fmt[1] != '[') {
 438                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 439                 return format;
 440         }
 441
 442         docstring curfmt = fmt;
 443         fmt = getClause(curfmt, ifpart);
 444         if (fmt == curfmt) {
 445                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 446                 return format;
 447         }
 448
 449         if (fmt[0] == '}') // we're done, no else clause
 450                 return fmt.substr(1);
 451
 452         // else part should follow
 453         if (fmt[0] != '[' || fmt[1] != '[') {
 454                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 455                 return format;
 456         }
 457
 458         curfmt = fmt;
 459         fmt = getClause(curfmt, elsepart);
 460         // we should be done
 461         if (fmt == curfmt || fmt[0] != '}') {
 462                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 463                 return format;
 464         }
 465         return fmt.substr(1);
 466 }
 467
 468
 469 } // anon namespace
 470
 471 /* FIXME
 472 Bug #9131 revealed an oddity in how we are generating citation information
 473 when more than one key is given. We end up building a longer and longer format
 474 string as we go, which we then have to re-parse, over and over and over again,
 475 rather than generating the information for the individual keys and then putting
 476 all of that together. We do that to deal with the way separators work, from what
 477 I can tell, but it still feels like a hack. Fixing this would require quite a
 478 bit of work, however.
 479 */
 480 docstring BibTeXInfo::expandFormat(docstring const & format,
 481                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 482                 CiteItem const & ci, bool next, bool second) const
 483 {
 484         // incorrect use of macros could put us in an infinite loop
 485         static int const max_passes = 5000;
 486         // the use of overly large keys can lead to performance problems, due
 487         // to eventual attempts to convert LaTeX macros to unicode. See bug
 488         // #8944. This is perhaps not the best solution, but it will have to
 489         // do for now.
 490         static size_t const max_keysize = 128;
 491         odocstringstream ret; // return value
 492         string key;
 493         bool scanning_key = false;
 494         bool scanning_rich = false;
 495
 496         CiteEngineType const engine_type = buf.params().citeEngineType();
 497         docstring fmt = format;
 498         // we'll remove characters from the front of fmt as we
 499         // deal with them
 500         while (!fmt.empty()) {
 501                 if (counter > max_passes) {
 502                         LYXERR0("Recursion limit reached while parsing `"
 503                                 << format << "'.");
 504                         return _("ERROR!");
 505                 }
 506
 507                 char_type thischar = fmt[0];
 508                 if (thischar == '%') {
 509                         // beginning or end of key
 510                         if (scanning_key) {
 511                                 // end of key
 512                                 scanning_key = false;
 513                                 // so we replace the key with its value, which may be empty
 514                                 if (key[0] == '!') {
 515                                         // macro
 516                                         string const val =
 517                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 518                                         fmt = from_utf8(val) + fmt.substr(1);
 519                                         counter += 1;
 520                                         continue;
 521                                 } else if (key[0] == '_') {
 522                                         // a translatable bit
 523                                         string const val =
 524                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 525                                         docstring const trans =
 526                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 527                                         ret << trans;
 528                                 } else {
 529                                         docstring const val =
 530                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 531                                         if (!scanning_rich)
 532                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 533                                         ret << val;
 534                                         if (!scanning_rich)
 535                                                 ret << from_ascii("{!</span>!}");
 536                                 }
 537                         } else {
 538                                 // beginning of key
 539                                 key.clear();
 540                                 scanning_key = true;
 541                         }
 542                 }
 543                 else if (thischar == '{') {
 544                         // beginning of option?
 545                         if (scanning_key) {
 546                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 547                                 return _("ERROR!");
 548                         }
 549                         if (fmt.size() > 1) {
 550                                 if (fmt[1] == '%') {
 551                                         // it is the beginning of an optional format
 552                                         string optkey;
 553                                         docstring ifpart;
 554                                         docstring elsepart;
 555                                         docstring const newfmt =
 556                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 557                                         if (newfmt == fmt) // parse error
 558                                                 return _("ERROR!");
 559                                         fmt = newfmt;
 560                                         docstring const val =
 561                                                 getValueForKey(optkey, buf, ci, xrefs);
 562                                         if (optkey == "next" && next)
 563                                                 ret << ifpart; // without expansion
 564                                         else if (!val.empty()) {
 565                                                 int newcounter = 0;
 566                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 567                                                         ci, next);
 568                                         } else if (!elsepart.empty()) {
 569                                                 int newcounter = 0;
 570                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 571                                                         ci, next);
 572                                         }
 573                                         // fmt will have been shortened for us already
 574                                         continue;
 575                                 }
 576                                 if (fmt[1] == '!') {
 577                                         // beginning of rich text
 578                                         scanning_rich = true;
 579                                         fmt = fmt.substr(2);
 580                                         ret << from_ascii("{!");
 581                                         continue;
 582                                 }
 583                         }
 584                         // we are here if '{' was not followed by % or !.
 585                         // So it's just a character.
 586                         ret << thischar;
 587                 }
 588                 else if (scanning_rich && thischar == '!'
 589                          && fmt.size() > 1 && fmt[1] == '}') {
 590                         // end of rich text
 591                         scanning_rich = false;
 592                         fmt = fmt.substr(2);
 593                         ret << from_ascii("!}");
 594                         continue;
 595                 }
 596                 else if (scanning_key)
 597                         key += char(thischar);
 598                 else {
 599                         try {
 600                                 ret.put(thischar);
 601                         } catch (EncodingException & /* e */) {
 602                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 603                         }
 604                 }
 605                 fmt = fmt.substr(1);
 606         } // for loop
 607         if (scanning_key) {
 608                 LYXERR0("Never found end of key in `" << format << "'!");
 609                 return _("ERROR!");
 610         }
 611         if (scanning_rich) {
 612                 LYXERR0("Never found end of rich text in `" << format << "'!");
 613                 return _("ERROR!");
 614         }
 615         return ret.str();
 616 }
 617
 618
 619 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 620         Buffer const & buf, CiteItem const & ci) const
 621 {
 622         bool const richtext = ci.richtext;
 623
 624         if (!richtext && !info_.empty())
 625                 return info_;
 626         if (richtext && !info_richtext_.empty())
 627                 return info_richtext_;
 628
 629         if (!is_bibtex_) {
 630                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 631                 info_ = it->second;
 632                 return info_;
 633         }
 634
 635         CiteEngineType const engine_type = buf.params().citeEngineType();
 636         DocumentClass const & dc = buf.params().documentClass();
 637         docstring const & format =
 638                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 639         int counter = 0;
 640         info_ = expandFormat(format, xrefs, counter, buf,
 641                 ci, false, false);
 642
 643         if (info_.empty()) {
 644                 // this probably shouldn't happen
 645                 return info_;
 646         }
 647
 648         if (richtext) {
 649                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 650                 return info_richtext_;
 651         }
 652
 653         info_ = convertLaTeXCommands(processRichtext(info_, false));
 654         return info_;
 655 }
 656
 657
 658 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 659         Buffer const & buf, docstring const & format,
 660         CiteItem const & ci, bool next, bool second) const
 661 {
 662         docstring loclabel;
 663
 664         int counter = 0;
 665         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 666
 667         if (!loclabel.empty() && !next) {
 668                 loclabel = processRichtext(loclabel, ci.richtext);
 669                 loclabel = convertLaTeXCommands(loclabel);
 670         }
 671
 672         return loclabel;
 673 }
 674
 675
 676 docstring const & BibTeXInfo::operator[](docstring const & field) const
 677 {
 678         BibTeXInfo::const_iterator it = find(field);
 679         if (it != end())
 680                 return it->second;
 681         static docstring const empty_value = docstring();
 682         return empty_value;
 683 }
 684
 685
 686 docstring const & BibTeXInfo::operator[](string const & field) const
 687 {
 688         return operator[](from_ascii(field));
 689 }
 690
 691
 692 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 693         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 694 {
 695         // anything less is pointless
 696         LASSERT(maxsize >= 16, maxsize = 16);
 697         string key = oldkey;
 698         bool cleanit = false;
 699         if (prefixIs(oldkey, "clean:")) {
 700                 key = oldkey.substr(6);
 701                 cleanit = true;
 702         }
 703
 704         docstring ret = operator[](key);
 705         if (ret.empty() && !xrefs.empty()) {
 706                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 707                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 708                 for (; it != en; ++it) {
 709                         if (*it && !(**it)[key].empty()) {
 710                                 ret = (**it)[key];
 711                                 break;
 712                         }
 713                 }
 714         }
 715         if (ret.empty()) {
 716                 // some special keys
 717                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 718                 if (key == "dialog" && ci.context == CiteItem::Dialog)
 719                         ret = from_ascii("x"); // any non-empty string will do
 720                 else if (key == "entrytype")
 721                         ret = entry_type_;
 722                 else if (key == "key")
 723                         ret = bib_key_;
 724                 else if (key == "label")
 725                         ret = label_;
 726                 else if (key == "modifier" && modifier_ != 0)
 727                         ret = modifier_;
 728                 else if (key == "numericallabel")
 729                         ret = cite_number_;
 730                 else if (key == "abbrvauthor")
 731                         // Special key to provide abbreviated author names.
 732                         ret = getAbbreviatedAuthor(&buf, false);
 733                 else if (key == "shortauthor")
 734                         // When shortauthor is not defined, jurabib automatically
 735                         // provides jurabib-style abbreviated author names. We do
 736                         // this as well.
 737                         ret = getAbbreviatedAuthor(&buf, true);
 738                 else if (key == "shorttitle") {
 739                         // When shorttitle is not defined, jurabib uses for `article'
 740                         // and `periodical' entries the form `journal volume [year]'
 741                         // and for other types of entries it uses the `title' field.
 742                         if (entry_type_ == "article" || entry_type_ == "periodical")
 743                                 ret = operator[]("journal") + " " + operator[]("volume")
 744                                         + " [" + operator[]("year") + "]";
 745                         else
 746                                 ret = operator[]("title");
 747                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 748                                 ret[0] = uppercase(ret[0]);
 749                 } else if (key == "bibentry") {
 750                         // Special key to provide the full bibliography entry: see getInfo()
 751                         CiteEngineType const engine_type = buf.params().citeEngineType();
 752                         DocumentClass const & dc = buf.params().documentClass();
 753                         docstring const & format =
 754                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 755                         int counter = 0;
 756                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
 757                 } else if (key == "textbefore")
 758                         ret = ci.textBefore;
 759                 else if (key == "textafter")
 760                         ret = ci.textAfter;
 761                 else if (key == "year")
 762                         ret = getYear();
 763         }
 764
 765         if (cleanit)
 766                 ret = html::cleanAttr(ret);
 767
 768         // make sure it is not too big
 769         support::truncateWithEllipsis(ret, maxsize);
 770         return ret;
 771 }
 772
 773
 774 //////////////////////////////////////////////////////////////////////
 775 //
 776 // BiblioInfo
 777 //
 778 //////////////////////////////////////////////////////////////////////
 779
 780 namespace {
 781
 782 // A functor for use with sort, leading to case insensitive sorting
 783 class compareNoCase: public binary_function<docstring, docstring, bool>
 784 {
 785 public:
 786         bool operator()(docstring const & s1, docstring const & s2) const {
 787                 return compare_no_case(s1, s2) < 0;
 788         }
 789 };
 790
 791 } // namespace anon
 792
 793
 794 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
 795 {
 796         vector<docstring> result;
 797         if (!data.isBibTeX())
 798                 return result;
 799         // Legacy crossref field. This is not nestable.
 800         if (!nested && !data["crossref"].empty()) {
 801                 docstring const xrefkey = data["crossref"];
 802                 result.push_back(xrefkey);
 803                 // However, check for nested xdatas
 804                 BiblioInfo::const_iterator it = find(xrefkey);
 805                 if (it != end()) {
 806                         BibTeXInfo const & xref = it->second;
 807                         vector<docstring> const nxdata = getXRefs(xref, true);
 808                         if (!nxdata.empty())
 809                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
 810                 }
 811         }
 812         // Biblatex's xdata field. Infinitely nestable.
 813         // XData field can consist of a comma-separated list of keys
 814         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
 815         if (!xdatakeys.empty()) {
 816                 vector<docstring>::const_iterator xit = xdatakeys.begin();
 817                 vector<docstring>::const_iterator xen = xdatakeys.end();
 818                 for (; xit != xen; ++xit) {
 819                         docstring const xdatakey = *xit;
 820                         result.push_back(xdatakey);
 821                         BiblioInfo::const_iterator it = find(xdatakey);
 822                         if (it != end()) {
 823                                 BibTeXInfo const & xdata = it->second;
 824                                 vector<docstring> const nxdata = getXRefs(xdata, true);
 825                                 if (!nxdata.empty())
 826                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
 827                         }
 828                 }
 829         }
 830         return result;
 831 }
 832
 833
 834 vector<docstring> const BiblioInfo::getKeys() const
 835 {
 836         vector<docstring> bibkeys;
 837         BiblioInfo::const_iterator it  = begin();
 838         for (; it != end(); ++it)
 839                 bibkeys.push_back(it->first);
 840         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 841         return bibkeys;
 842 }
 843
 844
 845 vector<docstring> const BiblioInfo::getFields() const
 846 {
 847         vector<docstring> bibfields;
 848         set<docstring>::const_iterator it = field_names_.begin();
 849         set<docstring>::const_iterator end = field_names_.end();
 850         for (; it != end; ++it)
 851                 bibfields.push_back(*it);
 852         sort(bibfields.begin(), bibfields.end());
 853         return bibfields;
 854 }
 855
 856
 857 vector<docstring> const BiblioInfo::getEntries() const
 858 {
 859         vector<docstring> bibentries;
 860         set<docstring>::const_iterator it = entry_types_.begin();
 861         set<docstring>::const_iterator end = entry_types_.end();
 862         for (; it != end; ++it)
 863                 bibentries.push_back(*it);
 864         sort(bibentries.begin(), bibentries.end());
 865         return bibentries;
 866 }
 867
 868
 869 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 870 {
 871         BiblioInfo::const_iterator it = find(key);
 872         if (it == end())
 873                 return docstring();
 874         BibTeXInfo const & data = it->second;
 875         return data.getAbbreviatedAuthor(&buf, false);
 876 }
 877
 878
 879 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 880 {
 881         BiblioInfo::const_iterator it = find(key);
 882         if (it == end())
 883                 return docstring();
 884         BibTeXInfo const & data = it->second;
 885         return data.citeNumber();
 886 }
 887
 888
 889 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 890 {
 891         BiblioInfo::const_iterator it = find(key);
 892         if (it == end())
 893                 return docstring();
 894         BibTeXInfo const & data = it->second;
 895         docstring year = data.getYear();
 896         if (year.empty()) {
 897                 // let's try the crossrefs
 898                 vector<docstring> const xrefs = getXRefs(data);
 899                 if (xrefs.empty())
 900                         // no luck
 901                         return docstring();
 902                 vector<docstring>::const_iterator it = xrefs.begin();
 903                 vector<docstring>::const_iterator en = xrefs.end();
 904                 for (; it != en; ++it) {
 905                         BiblioInfo::const_iterator const xrefit = find(*it);
 906                         if (xrefit == end())
 907                                 continue;
 908                         BibTeXInfo const & xref_data = xrefit->second;
 909                         year = xref_data.getYear();
 910                         if (!year.empty())
 911                                 // success!
 912                                 break;
 913                 }
 914         }
 915         if (use_modifier && data.modifier() != 0)
 916                 year += data.modifier();
 917         return year;
 918 }
 919
 920
 921 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 922 {
 923         docstring const year = getYear(key, use_modifier);
 924         if (year.empty())
 925                 return buf.B_("No year");
 926         return year;
 927 }
 928
 929
 930 docstring const BiblioInfo::getInfo(docstring const & key,
 931         Buffer const & buf, CiteItem const & ci) const
 932 {
 933         BiblioInfo::const_iterator it = find(key);
 934         if (it == end())
 935                 return docstring(_("Bibliography entry not found!"));
 936         BibTeXInfo const & data = it->second;
 937         BibTeXInfoList xrefptrs;
 938         vector<docstring> const xrefs = getXRefs(data);
 939         if (!xrefs.empty()) {
 940                 vector<docstring>::const_iterator it = xrefs.begin();
 941                 vector<docstring>::const_iterator en = xrefs.end();
 942                 for (; it != en; ++it) {
 943                         BiblioInfo::const_iterator const xrefit = find(*it);
 944                         if (xrefit != end())
 945                                 xrefptrs.push_back(&(xrefit->second));
 946                 }
 947         }
 948         return data.getInfo(xrefptrs, buf, ci);
 949 }
 950
 951
 952 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 953         Buffer const & buf, string const & style, CiteItem const & ci) const
 954 {
 955         size_t max_size = ci.max_size;
 956         // shorter makes no sense
 957         LASSERT(max_size >= 16, max_size = 16);
 958
 959         // we can't display more than 10 of these, anyway
 960         bool const too_many_keys = keys.size() > 10;
 961         if (too_many_keys)
 962                 keys.resize(10);
 963
 964         CiteEngineType const engine_type = buf.params().citeEngineType();
 965         DocumentClass const & dc = buf.params().documentClass();
 966         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 967         docstring ret = format;
 968         vector<docstring>::const_iterator key = keys.begin();
 969         vector<docstring>::const_iterator ken = keys.end();
 970         for (; key != ken; ++key) {
 971                 BiblioInfo::const_iterator it = find(*key);
 972                 BibTeXInfo empty_data;
 973                 empty_data.key(*key);
 974                 BibTeXInfo & data = empty_data;
 975                 vector<BibTeXInfo const *> xrefptrs;
 976                 if (it != end()) {
 977                         data = it->second;
 978                         vector<docstring> const xrefs = getXRefs(data);
 979                         if (!xrefs.empty()) {
 980                                 vector<docstring>::const_iterator it = xrefs.begin();
 981                                 vector<docstring>::const_iterator en = xrefs.end();
 982                                 for (; it != en; ++it) {
 983                                         BiblioInfo::const_iterator const xrefit = find(*it);
 984                                         if (xrefit != end())
 985                                                 xrefptrs.push_back(&(xrefit->second));
 986                                 }
 987                         }
 988                 }
 989                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
 990         }
 991
 992         if (too_many_keys)
 993                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 994         support::truncateWithEllipsis(ret, max_size);
 995         return ret;
 996 }
 997
 998
 999 bool BiblioInfo::isBibtex(docstring const & key) const
1000 {
1001         docstring key1;
1002         split(key, key1, ',');
1003         BiblioInfo::const_iterator it = find(key1);
1004         if (it == end())
1005                 return false;
1006         return it->second.isBibTeX();
1007 }
1008
1009
1010 vector<docstring> const BiblioInfo::getCiteStrings(
1011         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1012         Buffer const & buf, CiteItem const & ci) const
1013 {
1014         if (empty())
1015                 return vector<docstring>();
1016
1017         string style;
1018         vector<docstring> vec(styles.size());
1019         for (size_t i = 0; i != vec.size(); ++i) {
1020                 style = styles[i].name;
1021                 vec[i] = getLabel(keys, buf, style, ci);
1022         }
1023
1024         return vec;
1025 }
1026
1027
1028 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1029 {
1030         bimap_.insert(info.begin(), info.end());
1031         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1032         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1033 }
1034
1035
1036 namespace {
1037
1038 // used in xhtml to sort a list of BibTeXInfo objects
1039 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1040 {
1041         docstring const lauth = lhs->getAbbreviatedAuthor();
1042         docstring const rauth = rhs->getAbbreviatedAuthor();
1043         docstring const lyear = lhs->getYear();
1044         docstring const ryear = rhs->getYear();
1045         docstring const ltitl = lhs->operator[]("title");
1046         docstring const rtitl = rhs->operator[]("title");
1047         return  (lauth < rauth)
1048                 || (lauth == rauth && lyear < ryear)
1049                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1050 }
1051
1052 }
1053
1054
1055 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1056 {
1057         cited_entries_.clear();
1058         // We are going to collect all the citation keys used in the document,
1059         // getting them from the TOC.
1060         // FIXME We may want to collect these differently, in the first case,
1061         // so that we might have them in order of appearance.
1062         set<docstring> citekeys;
1063         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1064         Toc::const_iterator it = toc->begin();
1065         Toc::const_iterator const en = toc->end();
1066         for (; it != en; ++it) {
1067                 if (it->str().empty())
1068                         continue;
1069                 vector<docstring> const keys = getVectorFromString(it->str());
1070                 citekeys.insert(keys.begin(), keys.end());
1071         }
1072         if (citekeys.empty())
1073                 return;
1074
1075         // We have a set of the keys used in this document.
1076         // We will now convert it to a list of the BibTeXInfo objects used in
1077         // this document...
1078         vector<BibTeXInfo const *> bi;
1079         set<docstring>::const_iterator cit = citekeys.begin();
1080         set<docstring>::const_iterator const cen = citekeys.end();
1081         for (; cit != cen; ++cit) {
1082                 BiblioInfo::const_iterator const bt = find(*cit);
1083                 if (bt == end() || !bt->second.isBibTeX())
1084                         continue;
1085                 bi.push_back(&(bt->second));
1086         }
1087         // ...and sort it.
1088         sort(bi.begin(), bi.end(), lSorter);
1089
1090         // Now we can write the sorted keys
1091         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1092         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1093         for (; bit != ben; ++bit)
1094                 cited_entries_.push_back((*bit)->key());
1095 }
1096
1097
1098 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1099 {
1100         collectCitedEntries(buf);
1101         CiteEngineType const engine_type = buf.params().citeEngineType();
1102         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1103
1104         int keynumber = 0;
1105         char modifier = 0;
1106         // used to remember the last one we saw
1107         // we'll be comparing entries to see if we need to add
1108         // modifiers, like "1984a"
1109         map<docstring, BibTeXInfo>::iterator last;
1110
1111         vector<docstring>::const_iterator it = cited_entries_.begin();
1112         vector<docstring>::const_iterator const en = cited_entries_.end();
1113         for (; it != en; ++it) {
1114                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1115                 // this shouldn't happen, but...
1116                 if (biit == bimap_.end())
1117                         // ...fail gracefully, anyway.
1118                         continue;
1119                 BibTeXInfo & entry = biit->second;
1120                 if (numbers) {
1121                         docstring const num = convert<docstring>(++keynumber);
1122                         entry.setCiteNumber(num);
1123                 } else {
1124                         // coverity complains about our derefercing the iterator last,
1125                         // which was not initialized above. but it does get initialized
1126                         // after the first time through the loop, which is the point of
1127                         // the first test.
1128                         // coverity[FORWARD_NULL]
1129                         if (it != cited_entries_.begin()
1130                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1131                             // we access the year via getYear() so as to get it from the xref,
1132                             // if we need to do so
1133                             && getYear(entry.key()) == getYear(last->second.key())) {
1134                                 if (modifier == 0) {
1135                                         // so the last one should have been 'a'
1136                                         last->second.setModifier('a');
1137                                         modifier = 'b';
1138                                 } else if (modifier == 'z')
1139                                         modifier = 'A';
1140                                 else
1141                                         modifier++;
1142                         } else {
1143                                 modifier = 0;
1144                         }
1145                         entry.setModifier(modifier);
1146                         // remember the last one
1147                         last = biit;
1148                 }
1149         }
1150         // Set the labels
1151         it = cited_entries_.begin();
1152         for (; it != en; ++it) {
1153                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1154                 // this shouldn't happen, but...
1155                 if (biit == bimap_.end())
1156                         // ...fail gracefully, anyway.
1157                         continue;
1158                 BibTeXInfo & entry = biit->second;
1159                 if (numbers) {
1160                         entry.label(entry.citeNumber());
1161                 } else {
1162                         docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
1163                         // we do it this way so as to access the xref, if necessary
1164                         // note that this also gives us the modifier
1165                         docstring const year = getYear(*it, buf, true);
1166                         if (!auth.empty() && !year.empty())
1167                                 entry.label(auth + ' ' + year);
1168                         else
1169                                 entry.label(entry.key());
1170                 }
1171         }
1172 }
1173
1174
1175 //////////////////////////////////////////////////////////////////////
1176 //
1177 // CitationStyle
1178 //
1179 //////////////////////////////////////////////////////////////////////
1180
1181
1182 CitationStyle citationStyleFromString(string const & command,
1183                                       BufferParams const & params)
1184 {
1185         CitationStyle cs;
1186         if (command.empty())
1187                 return cs;
1188
1189         string const alias = params.getCiteAlias(command);
1190         string cmd = alias.empty() ? command : alias;
1191         if (isUpperCase(command[0])) {
1192                 cs.forceUpperCase = true;
1193                 cmd[0] = lowercase(cmd[0]);
1194         }
1195
1196         size_t const n = command.size() - 1;
1197         if (command[n] == '*') {
1198                 cs.hasStarredVersion = true;
1199                 if (suffixIs(cmd, '*'))
1200                         cmd = cmd.substr(0, cmd.size() - 1);
1201         }
1202
1203         cs.name = cmd;
1204         return cs;
1205 }
1206
1207
1208 string citationStyleToString(const CitationStyle & cs, bool const latex)
1209 {
1210         string cmd = latex ? cs.cmd : cs.name;
1211         if (cs.forceUpperCase)
1212                 cmd[0] = uppercase(cmd[0]);
1213         if (cs.hasStarredVersion)
1214                 cmd += '*';
1215         return cmd;
1216 }
1217
1218 } // namespace lyx