src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Citation.h"
  21 #include "Encoding.h"
  22 #include "InsetIterator.h"
  23 #include "Language.h"
  24 #include "output_xhtml.h"
  25 #include "Paragraph.h"
  26 #include "TextClass.h"
  27 #include "TocBackend.h"
  28
  29 #include "support/convert.h"
  30 #include "support/debug.h"
  31 #include "support/docstream.h"
  32 #include "support/gettext.h"
  33 #include "support/lassert.h"
  34 #include "support/lstrings.h"
  35 #include "support/regex.h"
  36 #include "support/textutils.h"
  37
  38 #include <set>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43
  44 namespace lyx {
  45
  46 namespace {
  47
  48 // gets the "family name" from an author-type string
  49 docstring familyName(docstring const & name)
  50 {
  51         if (name.empty())
  52                 return docstring();
  53
  54         // first we look for a comma, and take the last name to be everything
  55         // preceding the right-most one, so that we also get the "jr" part.
  56         docstring::size_type idx = name.rfind(',');
  57         if (idx != docstring::npos)
  58                 return ltrim(name.substr(0, idx));
  59
  60         // OK, so now we want to look for the last name. We're going to
  61         // include the "von" part. This isn't perfect.
  62         // Split on spaces, to get various tokens.
  63         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  64         // If we only get two, assume the last one is the last name
  65         if (pieces.size() <= 2)
  66                 return pieces.back();
  67
  68         // Now we look for the first token that begins with a lower case letter.
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator en = pieces.end();
  71         for (; it != en; ++it) {
  72                 if ((*it).empty())
  73                         continue;
  74                 char_type const c = (*it)[0];
  75                 if (isLower(c))
  76                         break;
  77         }
  78
  79         if (it == en) // we never found a "von"
  80                 return pieces.back();
  81
  82         // reconstruct what we need to return
  83         docstring retval;
  84         bool first = true;
  85         for (; it != en; ++it) {
  86                 if (!first)
  87                         retval += " ";
  88                 else
  89                         first = false;
  90                 retval += *it;
  91         }
  92         return retval;
  93 }
  94
  95
  96 // converts a string containing LaTeX commands into unicode
  97 // for display.
  98 docstring convertLaTeXCommands(docstring const & str)
  99 {
 100         docstring val = str;
 101         docstring ret;
 102
 103         bool scanning_cmd = false;
 104         bool scanning_math = false;
 105         bool escaped = false; // used to catch \$, etc.
 106         while (!val.empty()) {
 107                 char_type const ch = val[0];
 108
 109                 // if we're scanning math, we output everything until we
 110                 // find an unescaped $, at which point we break out.
 111                 if (scanning_math) {
 112                         if (escaped)
 113                                 escaped = false;
 114                         else if (ch == '\\')
 115                                 escaped = true;
 116                         else if (ch == '$')
 117                                 scanning_math = false;
 118                         ret += ch;
 119                         val = val.substr(1);
 120                         continue;
 121                 }
 122
 123                 // if we're scanning a command name, then we just
 124                 // discard characters until we hit something that
 125                 // isn't alpha.
 126                 if (scanning_cmd) {
 127                         if (isAlphaASCII(ch)) {
 128                                 val = val.substr(1);
 129                                 escaped = false;
 130                                 continue;
 131                         }
 132                         // so we're done with this command.
 133                         // now we fall through and check this character.
 134                         scanning_cmd = false;
 135                 }
 136
 137                 // was the last character a \? If so, then this is something like:
 138                 // \\ or \$, so we'll just output it. That's probably not always right...
 139                 if (escaped) {
 140                         // exception: output \, as THIN SPACE
 141                         if (ch == ',')
 142                                 ret.push_back(0x2009);
 143                         else
 144                                 ret += ch;
 145                         val = val.substr(1);
 146                         escaped = false;
 147                         continue;
 148                 }
 149
 150                 if (ch == '$') {
 151                         ret += ch;
 152                         val = val.substr(1);
 153                         scanning_math = true;
 154                         continue;
 155                 }
 156
 157                 // we just ignore braces
 158                 if (ch == '{' || ch == '}') {
 159                         val = val.substr(1);
 160                         continue;
 161                 }
 162
 163                 // we're going to check things that look like commands, so if
 164                 // this doesn't, just output it.
 165                 if (ch != '\\') {
 166                         ret += ch;
 167                         val = val.substr(1);
 168                         continue;
 169                 }
 170
 171                 // ok, could be a command of some sort
 172                 // let's see if it corresponds to some unicode
 173                 // unicodesymbols has things in the form: \"{u},
 174                 // whereas we may see things like: \"u. So we'll
 175                 // look for that and change it, if necessary.
 176                 // FIXME: This is a sort of mini-tex2lyx.
 177                 //        Use the real tex2lyx instead!
 178                 static lyx::regex const reg("^\\\\\\W\\w");
 179                 if (lyx::regex_search(to_utf8(val), reg)) {
 180                         val.insert(3, from_ascii("}"));
 181                         val.insert(2, from_ascii("{"));
 182                 }
 183                 bool termination;
 184                 docstring rem;
 185                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 186                                 Encodings::TEXT_CMD, termination, rem);
 187                 if (!cnvtd.empty()) {
 188                         // it did, so we'll take that bit and proceed with what's left
 189                         ret += cnvtd;
 190                         val = rem;
 191                         continue;
 192                 }
 193                 // it's a command of some sort
 194                 scanning_cmd = true;
 195                 escaped = true;
 196                 val = val.substr(1);
 197         }
 198         return ret;
 199 }
 200
 201
 202 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 203 docstring processRichtext(docstring const & str, bool richtext)
 204 {
 205         docstring val = str;
 206         docstring ret;
 207
 208         bool scanning_rich = false;
 209         while (!val.empty()) {
 210                 char_type const ch = val[0];
 211                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 212                         // beginning of rich text
 213                         scanning_rich = true;
 214                         val = val.substr(2);
 215                         continue;
 216                 }
 217                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 218                         // end of rich text
 219                         scanning_rich = false;
 220                         val = val.substr(2);
 221                         continue;
 222                 }
 223                 if (richtext) {
 224                         if (scanning_rich)
 225                                 ret += ch;
 226                         else {
 227                                 // we need to escape '<' and '>'
 228                                 if (ch == '<')
 229                                         ret += "&lt;";
 230                                 else if (ch == '>')
 231                                         ret += "&gt;";
 232                                 else
 233                                         ret += ch;
 234                         }
 235                 } else if (!scanning_rich /* && !richtext */)
 236                         ret += ch;
 237                 // else the character is discarded, which will happen only if
 238                 // richtext == false and we are scanning rich text
 239                 val = val.substr(1);
 240         }
 241         return ret;
 242 }
 243
 244 } // anon namespace
 245
 246
 247 //////////////////////////////////////////////////////////////////////
 248 //
 249 // BibTeXInfo
 250 //
 251 //////////////////////////////////////////////////////////////////////
 252
 253 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 254         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 255           modifier_(0)
 256 {}
 257
 258
 259 docstring const BibTeXInfo::getAuthorList(
 260     Buffer const * buf, bool full, bool forceshort) const
 261 {
 262         // Maxnames treshold depend on engine
 263         size_t maxnames = buf ?
 264                 buf->params().documentClass().max_citenames() : 2;
 265
 266         if (!is_bibtex_) {
 267                 docstring const opt = label();
 268                 if (opt.empty())
 269                         return docstring();
 270
 271                 docstring authors;
 272                 docstring const remainder = trim(split(opt, authors, '('));
 273                 if (remainder.empty())
 274                         // in this case, we didn't find a "(",
 275                         // so we don't have author (year)
 276                         return docstring();
 277                 return authors;
 278         }
 279
 280         docstring author = operator[]("author");
 281         if (author.empty()) {
 282                 author = operator[]("editor");
 283                 if (author.empty())
 284                         return author;
 285         }
 286
 287         // FIXME Move this to a separate routine that can
 288         // be called from elsewhere.
 289         //
 290         // OK, we've got some names. Let's format them.
 291         // Try to split the author list on " and "
 292         vector<docstring> const authors =
 293                 getVectorFromString(author, from_ascii(" and "));
 294
 295         docstring retval;
 296
 297         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 298                                                : ENGINE_TYPE_DEFAULT;
 299
 300         // These are defined in the styles
 301         string const etal =
 302                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
 303                     : " et al.";
 304         string const namesep =
 305                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
 306                    : ", ";
 307         string const lastnamesep =
 308                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
 309                     : ", and ";
 310         string const pairnamesep =
 311                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
 312                      : " and ";
 313
 314         // Shorten the list (with et al.) if forceshort is set
 315         // and the list can actually be shorten, else if maxcitenames
 316         // is passed and full is not set.
 317         bool shorten = forceshort && authors.size() > 1;
 318         vector<docstring>::const_iterator it = authors.begin();
 319         vector<docstring>::const_iterator en = authors.end();
 320         for (size_t i = 0; it != en; ++it, ++i) {
 321                 if (i >= maxnames && !full) {
 322                         shorten = true;
 323                         break;
 324                 }
 325                 if (*it == "others") {
 326                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 327                         break;
 328                 }
 329                 if (i > 0 && i == authors.size() - 1) {
 330                         if (authors.size() == 2)
 331                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 332                         else
 333                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 334                 } else if (i > 0)
 335                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 336                 retval += familyName(*it);
 337         }
 338         if (shorten)
 339                 retval = familyName(authors[0]) + (buf ? buf->B_(etal) : from_ascii(etal));
 340
 341         return convertLaTeXCommands(retval);
 342 }
 343
 344
 345 docstring const BibTeXInfo::getYear() const
 346 {
 347         if (is_bibtex_) {
 348                 // first try legacy year field
 349                 docstring year = operator[]("year");
 350                 if (!year.empty())
 351                         return year;
 352                 // now try biblatex's date field
 353                 year = operator[]("date");
 354                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 355                 // We only want the years.
 356                 static regex const yreg("[-]?([\\d]{4}).*");
 357                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 358                 smatch sm;
 359                 string const date = to_utf8(year);
 360                 regex_match(date, sm, yreg);
 361                 year = from_ascii(sm[1]);
 362                 // check for an endyear
 363                 if (regex_match(date, sm, ereg))
 364                         year += char_type(0x2013) + from_ascii(sm[1]);
 365                 return year;
 366         }
 367
 368         docstring const opt = label();
 369         if (opt.empty())
 370                 return docstring();
 371
 372         docstring authors;
 373         docstring tmp = split(opt, authors, '(');
 374         if (tmp.empty())
 375                 // we don't have author (year)
 376                 return docstring();
 377         docstring year;
 378         tmp = split(tmp, year, ')');
 379         return year;
 380 }
 381
 382
 383 namespace {
 384
 385 docstring parseOptions(docstring const & format, string & optkey,
 386                     docstring & ifpart, docstring & elsepart);
 387
 388 // Calls parseOptions to deal with an embedded option, such as:
 389 //   {%number%[[, no.~%number%]]}
 390 // which must appear at the start of format. ifelsepart gets the
 391 // whole of the option, and we return what's left after the option.
 392 // we return format if there is an error.
 393 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 394 {
 395         LASSERT(format[0] == '{' && format[1] == '%', return format);
 396         string optkey;
 397         docstring ifpart;
 398         docstring elsepart;
 399         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 400         if (format == rest) { // parse error
 401                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 402                 return format;
 403         }
 404         LASSERT(rest.size() <= format.size(),
 405                 { ifelsepart = docstring(); return format; });
 406         ifelsepart = format.substr(0, format.size() - rest.size());
 407         return rest;
 408 }
 409
 410
 411 // Gets a "clause" from a format string, where the clause is
 412 // delimited by '[[' and ']]'. Returns what is left after the
 413 // clause is removed, and returns format if there is an error.
 414 docstring getClause(docstring const & format, docstring & clause)
 415 {
 416         docstring fmt = format;
 417         // remove '[['
 418         fmt = fmt.substr(2);
 419         // we'll remove characters from the front of fmt as we
 420         // deal with them
 421         while (!fmt.empty()) {
 422                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 423                         // that's the end
 424                         fmt = fmt.substr(2);
 425                         break;
 426                 }
 427                 // check for an embedded option
 428                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 429                         docstring part;
 430                         docstring const rest = parseEmbeddedOption(fmt, part);
 431                         if (fmt == rest) {
 432                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 433                                 return format;
 434                         }
 435                         clause += part;
 436                         fmt = rest;
 437                 } else { // it's just a normal character
 438                                 clause += fmt[0];
 439                                 fmt = fmt.substr(1);
 440                 }
 441         }
 442         return fmt;
 443 }
 444
 445
 446 // parse an options string, which must appear at the start of the
 447 // format parameter. puts the parsed bits in optkey, ifpart, and
 448 // elsepart and returns what's left after the option is removed.
 449 // if there's an error, it returns format itself.
 450 docstring parseOptions(docstring const & format, string & optkey,
 451                     docstring & ifpart, docstring & elsepart)
 452 {
 453         LASSERT(format[0] == '{' && format[1] == '%', return format);
 454         // strip '{%'
 455         docstring fmt = format.substr(2);
 456         size_t pos = fmt.find('%'); // end of key
 457         if (pos == string::npos) {
 458                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 459                 return format;
 460         }
 461         optkey = to_utf8(fmt.substr(0, pos));
 462         fmt = fmt.substr(pos + 1);
 463         // [[format]] should be next
 464         if (fmt[0] != '[' || fmt[1] != '[') {
 465                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 466                 return format;
 467         }
 468
 469         docstring curfmt = fmt;
 470         fmt = getClause(curfmt, ifpart);
 471         if (fmt == curfmt) {
 472                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 473                 return format;
 474         }
 475
 476         if (fmt[0] == '}') // we're done, no else clause
 477                 return fmt.substr(1);
 478
 479         // else part should follow
 480         if (fmt[0] != '[' || fmt[1] != '[') {
 481                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 482                 return format;
 483         }
 484
 485         curfmt = fmt;
 486         fmt = getClause(curfmt, elsepart);
 487         // we should be done
 488         if (fmt == curfmt || fmt[0] != '}') {
 489                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 490                 return format;
 491         }
 492         return fmt.substr(1);
 493 }
 494
 495
 496 } // anon namespace
 497
 498 /* FIXME
 499 Bug #9131 revealed an oddity in how we are generating citation information
 500 when more than one key is given. We end up building a longer and longer format
 501 string as we go, which we then have to re-parse, over and over and over again,
 502 rather than generating the information for the individual keys and then putting
 503 all of that together. We do that to deal with the way separators work, from what
 504 I can tell, but it still feels like a hack. Fixing this would require quite a
 505 bit of work, however.
 506 */
 507 docstring BibTeXInfo::expandFormat(docstring const & format,
 508                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 509                 CiteItem const & ci, bool next, bool second) const
 510 {
 511         // incorrect use of macros could put us in an infinite loop
 512         static int const max_passes = 5000;
 513         // the use of overly large keys can lead to performance problems, due
 514         // to eventual attempts to convert LaTeX macros to unicode. See bug
 515         // #8944. This is perhaps not the best solution, but it will have to
 516         // do for now.
 517         static size_t const max_keysize = 128;
 518         odocstringstream ret; // return value
 519         string key;
 520         bool scanning_key = false;
 521         bool scanning_rich = false;
 522
 523         CiteEngineType const engine_type = buf.params().citeEngineType();
 524         docstring fmt = format;
 525         // we'll remove characters from the front of fmt as we
 526         // deal with them
 527         while (!fmt.empty()) {
 528                 if (counter > max_passes) {
 529                         LYXERR0("Recursion limit reached while parsing `"
 530                                 << format << "'.");
 531                         return _("ERROR!");
 532                 }
 533
 534                 char_type thischar = fmt[0];
 535                 if (thischar == '%') {
 536                         // beginning or end of key
 537                         if (scanning_key) {
 538                                 // end of key
 539                                 scanning_key = false;
 540                                 // so we replace the key with its value, which may be empty
 541                                 if (key[0] == '!') {
 542                                         // macro
 543                                         string const val =
 544                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 545                                         fmt = from_utf8(val) + fmt.substr(1);
 546                                         counter += 1;
 547                                         continue;
 548                                 } else if (key[0] == '_') {
 549                                         // a translatable bit
 550                                         string const val =
 551                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 552                                         docstring const trans =
 553                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 554                                         ret << trans;
 555                                 } else {
 556                                         docstring const val =
 557                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 558                                         if (!scanning_rich)
 559                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 560                                         ret << val;
 561                                         if (!scanning_rich)
 562                                                 ret << from_ascii("{!</span>!}");
 563                                 }
 564                         } else {
 565                                 // beginning of key
 566                                 key.clear();
 567                                 scanning_key = true;
 568                         }
 569                 }
 570                 else if (thischar == '{') {
 571                         // beginning of option?
 572                         if (scanning_key) {
 573                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 574                                 return _("ERROR!");
 575                         }
 576                         if (fmt.size() > 1) {
 577                                 if (fmt[1] == '%') {
 578                                         // it is the beginning of an optional format
 579                                         string optkey;
 580                                         docstring ifpart;
 581                                         docstring elsepart;
 582                                         docstring const newfmt =
 583                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 584                                         if (newfmt == fmt) // parse error
 585                                                 return _("ERROR!");
 586                                         fmt = newfmt;
 587                                         docstring const val =
 588                                                 getValueForKey(optkey, buf, ci, xrefs);
 589                                         if (optkey == "next" && next)
 590                                                 ret << ifpart; // without expansion
 591                                         else if (optkey == "second" && second) {
 592                                                 int newcounter = 0;
 593                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 594                                                         ci, next);
 595                                         } else if (!val.empty()) {
 596                                                 int newcounter = 0;
 597                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 598                                                         ci, next);
 599                                         } else if (!elsepart.empty()) {
 600                                                 int newcounter = 0;
 601                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 602                                                         ci, next);
 603                                         }
 604                                         // fmt will have been shortened for us already
 605                                         continue;
 606                                 }
 607                                 if (fmt[1] == '!') {
 608                                         // beginning of rich text
 609                                         scanning_rich = true;
 610                                         fmt = fmt.substr(2);
 611                                         ret << from_ascii("{!");
 612                                         continue;
 613                                 }
 614                         }
 615                         // we are here if '{' was not followed by % or !.
 616                         // So it's just a character.
 617                         ret << thischar;
 618                 }
 619                 else if (scanning_rich && thischar == '!'
 620                          && fmt.size() > 1 && fmt[1] == '}') {
 621                         // end of rich text
 622                         scanning_rich = false;
 623                         fmt = fmt.substr(2);
 624                         ret << from_ascii("!}");
 625                         continue;
 626                 }
 627                 else if (scanning_key)
 628                         key += char(thischar);
 629                 else {
 630                         try {
 631                                 ret.put(thischar);
 632                         } catch (EncodingException & /* e */) {
 633                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 634                         }
 635                 }
 636                 fmt = fmt.substr(1);
 637         } // for loop
 638         if (scanning_key) {
 639                 LYXERR0("Never found end of key in `" << format << "'!");
 640                 return _("ERROR!");
 641         }
 642         if (scanning_rich) {
 643                 LYXERR0("Never found end of rich text in `" << format << "'!");
 644                 return _("ERROR!");
 645         }
 646         return ret.str();
 647 }
 648
 649
 650 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 651         Buffer const & buf, CiteItem const & ci) const
 652 {
 653         bool const richtext = ci.richtext;
 654
 655         if (!richtext && !info_.empty())
 656                 return info_;
 657         if (richtext && !info_richtext_.empty())
 658                 return info_richtext_;
 659
 660         if (!is_bibtex_) {
 661                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 662                 info_ = it->second;
 663                 return info_;
 664         }
 665
 666         CiteEngineType const engine_type = buf.params().citeEngineType();
 667         DocumentClass const & dc = buf.params().documentClass();
 668         docstring const & format =
 669                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 670         int counter = 0;
 671         info_ = expandFormat(format, xrefs, counter, buf,
 672                 ci, false, false);
 673
 674         if (info_.empty()) {
 675                 // this probably shouldn't happen
 676                 return info_;
 677         }
 678
 679         if (richtext) {
 680                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 681                 return info_richtext_;
 682         }
 683
 684         info_ = convertLaTeXCommands(processRichtext(info_, false));
 685         return info_;
 686 }
 687
 688
 689 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 690         Buffer const & buf, docstring const & format,
 691         CiteItem const & ci, bool next, bool second) const
 692 {
 693         docstring loclabel;
 694
 695         int counter = 0;
 696         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 697
 698         if (!loclabel.empty() && !next) {
 699                 loclabel = processRichtext(loclabel, ci.richtext);
 700                 loclabel = convertLaTeXCommands(loclabel);
 701         }
 702
 703         return loclabel;
 704 }
 705
 706
 707 docstring const & BibTeXInfo::operator[](docstring const & field) const
 708 {
 709         BibTeXInfo::const_iterator it = find(field);
 710         if (it != end())
 711                 return it->second;
 712         static docstring const empty_value = docstring();
 713         return empty_value;
 714 }
 715
 716
 717 docstring const & BibTeXInfo::operator[](string const & field) const
 718 {
 719         return operator[](from_ascii(field));
 720 }
 721
 722
 723 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 724         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 725 {
 726         // anything less is pointless
 727         LASSERT(maxsize >= 16, maxsize = 16);
 728         string key = oldkey;
 729         bool cleanit = false;
 730         if (prefixIs(oldkey, "clean:")) {
 731                 key = oldkey.substr(6);
 732                 cleanit = true;
 733         }
 734
 735         docstring ret = operator[](key);
 736         if (ret.empty() && !xrefs.empty()) {
 737                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 738                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 739                 for (; it != en; ++it) {
 740                         if (*it && !(**it)[key].empty()) {
 741                                 ret = (**it)[key];
 742                                 break;
 743                         }
 744                 }
 745         }
 746         if (ret.empty()) {
 747                 // some special keys
 748                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 749                 if (key == "dialog" && ci.context == CiteItem::Dialog)
 750                         ret = from_ascii("x"); // any non-empty string will do
 751                 else if (key == "export" && ci.context == CiteItem::Export)
 752                         ret = from_ascii("x"); // any non-empty string will do
 753                 else if (key == "ifstar" && ci.Starred)
 754                         ret = from_ascii("x"); // any non-empty string will do
 755                 else if (key == "entrytype")
 756                         ret = entry_type_;
 757                 else if (prefixIs(key, "ifentrytype:")
 758                          && from_ascii(key.substr(12)) == entry_type_)
 759                         ret = from_ascii("x"); // any non-empty string will do
 760                 else if (key == "key")
 761                         ret = bib_key_;
 762                 else if (key == "label")
 763                         ret = label_;
 764                 else if (key == "modifier" && modifier_ != 0)
 765                         ret = modifier_;
 766                 else if (key == "numericallabel")
 767                         ret = cite_number_;
 768                 else if (key == "abbrvauthor") {
 769                         // Special key to provide abbreviated author names,
 770                         // with respect to maxcitenames.
 771                         ret = getAuthorList(&buf, false, false);
 772                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 773                                 ret[0] = uppercase(ret[0]);
 774                 } else if (key == "fullauthor") {
 775                         // Return a full author list
 776                         ret = getAuthorList(&buf, true, false);
 777                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 778                                 ret[0] = uppercase(ret[0]);
 779                 } else if (key == "forceabbrvauthor") {
 780                         // Special key to provide abbreviated author names,
 781                         // irrespective of maxcitenames.
 782                         ret = getAuthorList(&buf, false, true);
 783                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 784                                 ret[0] = uppercase(ret[0]);
 785                 } else if (key == "bibentry") {
 786                         // Special key to provide the full bibliography entry: see getInfo()
 787                         CiteEngineType const engine_type = buf.params().citeEngineType();
 788                         DocumentClass const & dc = buf.params().documentClass();
 789                         docstring const & format =
 790                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 791                         int counter = 0;
 792                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
 793                 } else if (key == "textbefore")
 794                         ret = ci.textBefore;
 795                 else if (key == "textafter")
 796                         ret = ci.textAfter;
 797                 else if (key == "year")
 798                         ret = getYear();
 799         }
 800
 801         if (cleanit)
 802                 ret = html::cleanAttr(ret);
 803
 804         // make sure it is not too big
 805         support::truncateWithEllipsis(ret, maxsize);
 806         return ret;
 807 }
 808
 809
 810 //////////////////////////////////////////////////////////////////////
 811 //
 812 // BiblioInfo
 813 //
 814 //////////////////////////////////////////////////////////////////////
 815
 816 namespace {
 817
 818 // A functor for use with sort, leading to case insensitive sorting
 819 class compareNoCase: public binary_function<docstring, docstring, bool>
 820 {
 821 public:
 822         bool operator()(docstring const & s1, docstring const & s2) const {
 823                 return compare_no_case(s1, s2) < 0;
 824         }
 825 };
 826
 827 } // namespace anon
 828
 829
 830 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
 831 {
 832         vector<docstring> result;
 833         if (!data.isBibTeX())
 834                 return result;
 835         // Legacy crossref field. This is not nestable.
 836         if (!nested && !data["crossref"].empty()) {
 837                 docstring const xrefkey = data["crossref"];
 838                 result.push_back(xrefkey);
 839                 // However, check for nested xdatas
 840                 BiblioInfo::const_iterator it = find(xrefkey);
 841                 if (it != end()) {
 842                         BibTeXInfo const & xref = it->second;
 843                         vector<docstring> const nxdata = getXRefs(xref, true);
 844                         if (!nxdata.empty())
 845                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
 846                 }
 847         }
 848         // Biblatex's xdata field. Infinitely nestable.
 849         // XData field can consist of a comma-separated list of keys
 850         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
 851         if (!xdatakeys.empty()) {
 852                 vector<docstring>::const_iterator xit = xdatakeys.begin();
 853                 vector<docstring>::const_iterator xen = xdatakeys.end();
 854                 for (; xit != xen; ++xit) {
 855                         docstring const xdatakey = *xit;
 856                         result.push_back(xdatakey);
 857                         BiblioInfo::const_iterator it = find(xdatakey);
 858                         if (it != end()) {
 859                                 BibTeXInfo const & xdata = it->second;
 860                                 vector<docstring> const nxdata = getXRefs(xdata, true);
 861                                 if (!nxdata.empty())
 862                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
 863                         }
 864                 }
 865         }
 866         return result;
 867 }
 868
 869
 870 vector<docstring> const BiblioInfo::getKeys() const
 871 {
 872         vector<docstring> bibkeys;
 873         BiblioInfo::const_iterator it  = begin();
 874         for (; it != end(); ++it)
 875                 bibkeys.push_back(it->first);
 876         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 877         return bibkeys;
 878 }
 879
 880
 881 vector<docstring> const BiblioInfo::getFields() const
 882 {
 883         vector<docstring> bibfields;
 884         set<docstring>::const_iterator it = field_names_.begin();
 885         set<docstring>::const_iterator end = field_names_.end();
 886         for (; it != end; ++it)
 887                 bibfields.push_back(*it);
 888         sort(bibfields.begin(), bibfields.end());
 889         return bibfields;
 890 }
 891
 892
 893 vector<docstring> const BiblioInfo::getEntries() const
 894 {
 895         vector<docstring> bibentries;
 896         set<docstring>::const_iterator it = entry_types_.begin();
 897         set<docstring>::const_iterator end = entry_types_.end();
 898         for (; it != end; ++it)
 899                 bibentries.push_back(*it);
 900         sort(bibentries.begin(), bibentries.end());
 901         return bibentries;
 902 }
 903
 904
 905 docstring const BiblioInfo::getAuthorList(docstring const & key, Buffer const & buf) const
 906 {
 907         BiblioInfo::const_iterator it = find(key);
 908         if (it == end())
 909                 return docstring();
 910         BibTeXInfo const & data = it->second;
 911         return data.getAuthorList(&buf, false);
 912 }
 913
 914
 915 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 916 {
 917         BiblioInfo::const_iterator it = find(key);
 918         if (it == end())
 919                 return docstring();
 920         BibTeXInfo const & data = it->second;
 921         return data.citeNumber();
 922 }
 923
 924
 925 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 926 {
 927         BiblioInfo::const_iterator it = find(key);
 928         if (it == end())
 929                 return docstring();
 930         BibTeXInfo const & data = it->second;
 931         docstring year = data.getYear();
 932         if (year.empty()) {
 933                 // let's try the crossrefs
 934                 vector<docstring> const xrefs = getXRefs(data);
 935                 if (xrefs.empty())
 936                         // no luck
 937                         return docstring();
 938                 vector<docstring>::const_iterator it = xrefs.begin();
 939                 vector<docstring>::const_iterator en = xrefs.end();
 940                 for (; it != en; ++it) {
 941                         BiblioInfo::const_iterator const xrefit = find(*it);
 942                         if (xrefit == end())
 943                                 continue;
 944                         BibTeXInfo const & xref_data = xrefit->second;
 945                         year = xref_data.getYear();
 946                         if (!year.empty())
 947                                 // success!
 948                                 break;
 949                 }
 950         }
 951         if (use_modifier && data.modifier() != 0)
 952                 year += data.modifier();
 953         return year;
 954 }
 955
 956
 957 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 958 {
 959         docstring const year = getYear(key, use_modifier);
 960         if (year.empty())
 961                 return buf.B_("No year");
 962         return year;
 963 }
 964
 965
 966 docstring const BiblioInfo::getInfo(docstring const & key,
 967         Buffer const & buf, CiteItem const & ci) const
 968 {
 969         BiblioInfo::const_iterator it = find(key);
 970         if (it == end())
 971                 return docstring(_("Bibliography entry not found!"));
 972         BibTeXInfo const & data = it->second;
 973         BibTeXInfoList xrefptrs;
 974         vector<docstring> const xrefs = getXRefs(data);
 975         if (!xrefs.empty()) {
 976                 vector<docstring>::const_iterator it = xrefs.begin();
 977                 vector<docstring>::const_iterator en = xrefs.end();
 978                 for (; it != en; ++it) {
 979                         BiblioInfo::const_iterator const xrefit = find(*it);
 980                         if (xrefit != end())
 981                                 xrefptrs.push_back(&(xrefit->second));
 982                 }
 983         }
 984         return data.getInfo(xrefptrs, buf, ci);
 985 }
 986
 987
 988 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 989         Buffer const & buf, string const & style, CiteItem const & ci) const
 990 {
 991         size_t max_size = ci.max_size;
 992         // shorter makes no sense
 993         LASSERT(max_size >= 16, max_size = 16);
 994
 995         // we can't display more than 10 of these, anyway
 996         bool const too_many_keys = keys.size() > 10;
 997         if (too_many_keys)
 998                 keys.resize(10);
 999
1000         CiteEngineType const engine_type = buf.params().citeEngineType();
1001         DocumentClass const & dc = buf.params().documentClass();
1002         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
1003         docstring ret = format;
1004         vector<docstring>::const_iterator key = keys.begin();
1005         vector<docstring>::const_iterator ken = keys.end();
1006         for (int i = 0; key != ken; ++key, ++i) {
1007                 BiblioInfo::const_iterator it = find(*key);
1008                 BibTeXInfo empty_data;
1009                 empty_data.key(*key);
1010                 BibTeXInfo & data = empty_data;
1011                 vector<BibTeXInfo const *> xrefptrs;
1012                 if (it != end()) {
1013                         data = it->second;
1014                         vector<docstring> const xrefs = getXRefs(data);
1015                         if (!xrefs.empty()) {
1016                                 vector<docstring>::const_iterator it = xrefs.begin();
1017                                 vector<docstring>::const_iterator en = xrefs.end();
1018                                 for (; it != en; ++it) {
1019                                         BiblioInfo::const_iterator const xrefit = find(*it);
1020                                         if (xrefit != end())
1021                                                 xrefptrs.push_back(&(xrefit->second));
1022                                 }
1023                         }
1024                 }
1025                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1026         }
1027
1028         if (too_many_keys)
1029                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1030         support::truncateWithEllipsis(ret, max_size);
1031         return ret;
1032 }
1033
1034
1035 bool BiblioInfo::isBibtex(docstring const & key) const
1036 {
1037         docstring key1;
1038         split(key, key1, ',');
1039         BiblioInfo::const_iterator it = find(key1);
1040         if (it == end())
1041                 return false;
1042         return it->second.isBibTeX();
1043 }
1044
1045
1046 vector<docstring> const BiblioInfo::getCiteStrings(
1047         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1048         Buffer const & buf, CiteItem const & ci) const
1049 {
1050         if (empty())
1051                 return vector<docstring>();
1052
1053         string style;
1054         vector<docstring> vec(styles.size());
1055         for (size_t i = 0; i != vec.size(); ++i) {
1056                 style = styles[i].name;
1057                 vec[i] = getLabel(keys, buf, style, ci);
1058         }
1059
1060         return vec;
1061 }
1062
1063
1064 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1065 {
1066         bimap_.insert(info.begin(), info.end());
1067         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1068         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1069 }
1070
1071
1072 namespace {
1073
1074 // used in xhtml to sort a list of BibTeXInfo objects
1075 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1076 {
1077         docstring const lauth = lhs->getAuthorList();
1078         docstring const rauth = rhs->getAuthorList();
1079         docstring const lyear = lhs->getYear();
1080         docstring const ryear = rhs->getYear();
1081         docstring const ltitl = lhs->operator[]("title");
1082         docstring const rtitl = rhs->operator[]("title");
1083         return  (lauth < rauth)
1084                 || (lauth == rauth && lyear < ryear)
1085                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1086 }
1087
1088 }
1089
1090
1091 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1092 {
1093         cited_entries_.clear();
1094         // We are going to collect all the citation keys used in the document,
1095         // getting them from the TOC.
1096         // FIXME We may want to collect these differently, in the first case,
1097         // so that we might have them in order of appearance.
1098         set<docstring> citekeys;
1099         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1100         Toc::const_iterator it = toc->begin();
1101         Toc::const_iterator const en = toc->end();
1102         for (; it != en; ++it) {
1103                 if (it->str().empty())
1104                         continue;
1105                 vector<docstring> const keys = getVectorFromString(it->str());
1106                 citekeys.insert(keys.begin(), keys.end());
1107         }
1108         if (citekeys.empty())
1109                 return;
1110
1111         // We have a set of the keys used in this document.
1112         // We will now convert it to a list of the BibTeXInfo objects used in
1113         // this document...
1114         vector<BibTeXInfo const *> bi;
1115         set<docstring>::const_iterator cit = citekeys.begin();
1116         set<docstring>::const_iterator const cen = citekeys.end();
1117         for (; cit != cen; ++cit) {
1118                 BiblioInfo::const_iterator const bt = find(*cit);
1119                 if (bt == end() || !bt->second.isBibTeX())
1120                         continue;
1121                 bi.push_back(&(bt->second));
1122         }
1123         // ...and sort it.
1124         sort(bi.begin(), bi.end(), lSorter);
1125
1126         // Now we can write the sorted keys
1127         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1128         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1129         for (; bit != ben; ++bit)
1130                 cited_entries_.push_back((*bit)->key());
1131 }
1132
1133
1134 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1135 {
1136         collectCitedEntries(buf);
1137         CiteEngineType const engine_type = buf.params().citeEngineType();
1138         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1139
1140         int keynumber = 0;
1141         char modifier = 0;
1142         // used to remember the last one we saw
1143         // we'll be comparing entries to see if we need to add
1144         // modifiers, like "1984a"
1145         map<docstring, BibTeXInfo>::iterator last;
1146
1147         vector<docstring>::const_iterator it = cited_entries_.begin();
1148         vector<docstring>::const_iterator const en = cited_entries_.end();
1149         for (; it != en; ++it) {
1150                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1151                 // this shouldn't happen, but...
1152                 if (biit == bimap_.end())
1153                         // ...fail gracefully, anyway.
1154                         continue;
1155                 BibTeXInfo & entry = biit->second;
1156                 if (numbers) {
1157                         docstring const num = convert<docstring>(++keynumber);
1158                         entry.setCiteNumber(num);
1159                 } else {
1160                         // coverity complains about our derefercing the iterator last,
1161                         // which was not initialized above. but it does get initialized
1162                         // after the first time through the loop, which is the point of
1163                         // the first test.
1164                         // coverity[FORWARD_NULL]
1165                         if (it != cited_entries_.begin()
1166                             && entry.getAuthorList() == last->second.getAuthorList()
1167                             // we access the year via getYear() so as to get it from the xref,
1168                             // if we need to do so
1169                             && getYear(entry.key()) == getYear(last->second.key())) {
1170                                 if (modifier == 0) {
1171                                         // so the last one should have been 'a'
1172                                         last->second.setModifier('a');
1173                                         modifier = 'b';
1174                                 } else if (modifier == 'z')
1175                                         modifier = 'A';
1176                                 else
1177                                         modifier++;
1178                         } else {
1179                                 modifier = 0;
1180                         }
1181                         entry.setModifier(modifier);
1182                         // remember the last one
1183                         last = biit;
1184                 }
1185         }
1186         // Set the labels
1187         it = cited_entries_.begin();
1188         for (; it != en; ++it) {
1189                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1190                 // this shouldn't happen, but...
1191                 if (biit == bimap_.end())
1192                         // ...fail gracefully, anyway.
1193                         continue;
1194                 BibTeXInfo & entry = biit->second;
1195                 if (numbers) {
1196                         entry.label(entry.citeNumber());
1197                 } else {
1198                         docstring const auth = entry.getAuthorList(&buf, false);
1199                         // we do it this way so as to access the xref, if necessary
1200                         // note that this also gives us the modifier
1201                         docstring const year = getYear(*it, buf, true);
1202                         if (!auth.empty() && !year.empty())
1203                                 entry.label(auth + ' ' + year);
1204                         else
1205                                 entry.label(entry.key());
1206                 }
1207         }
1208 }
1209
1210
1211 //////////////////////////////////////////////////////////////////////
1212 //
1213 // CitationStyle
1214 //
1215 //////////////////////////////////////////////////////////////////////
1216
1217
1218 CitationStyle citationStyleFromString(string const & command,
1219                                       BufferParams const & params)
1220 {
1221         CitationStyle cs;
1222         if (command.empty())
1223                 return cs;
1224
1225         string const alias = params.getCiteAlias(command);
1226         string cmd = alias.empty() ? command : alias;
1227         if (isUpperCase(command[0])) {
1228                 cs.forceUpperCase = true;
1229                 cmd[0] = lowercase(cmd[0]);
1230         }
1231
1232         size_t const n = command.size() - 1;
1233         if (command[n] == '*') {
1234                 cs.hasStarredVersion = true;
1235                 if (suffixIs(cmd, '*'))
1236                         cmd = cmd.substr(0, cmd.size() - 1);
1237         }
1238
1239         cs.name = cmd;
1240         return cs;
1241 }
1242
1243
1244 string citationStyleToString(const CitationStyle & cs, bool const latex)
1245 {
1246         string cmd = latex ? cs.cmd : cs.name;
1247         if (cs.forceUpperCase)
1248                 cmd[0] = uppercase(cmd[0]);
1249         if (cs.hasStarredVersion)
1250                 cmd += '*';
1251         return cmd;
1252 }
1253
1254 } // namespace lyx