src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Citation.h"
  21 #include "Encoding.h"
  22 #include "InsetIterator.h"
  23 #include "Language.h"
  24 #include "output_xhtml.h"
  25 #include "Paragraph.h"
  26 #include "TextClass.h"
  27 #include "TocBackend.h"
  28
  29 #include "support/convert.h"
  30 #include "support/debug.h"
  31 #include "support/docstream.h"
  32 #include "support/gettext.h"
  33 #include "support/lassert.h"
  34 #include "support/lstrings.h"
  35 #include "support/regex.h"
  36 #include "support/textutils.h"
  37
  38 #include <set>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43
  44 namespace lyx {
  45
  46 namespace {
  47
  48 // gets the "family name" from an author-type string
  49 docstring familyName(docstring const & name)
  50 {
  51         if (name.empty())
  52                 return docstring();
  53
  54         // first we look for a comma, and take the last name to be everything
  55         // preceding the right-most one, so that we also get the "jr" part.
  56         docstring::size_type idx = name.rfind(',');
  57         if (idx != docstring::npos)
  58                 return ltrim(name.substr(0, idx));
  59
  60         // OK, so now we want to look for the last name. We're going to
  61         // include the "von" part. This isn't perfect.
  62         // Split on spaces, to get various tokens.
  63         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  64         // If we only get two, assume the last one is the last name
  65         if (pieces.size() <= 2)
  66                 return pieces.back();
  67
  68         // Now we look for the first token that begins with a lower case letter.
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator en = pieces.end();
  71         for (; it != en; ++it) {
  72                 if ((*it).empty())
  73                         continue;
  74                 char_type const c = (*it)[0];
  75                 if (isLower(c))
  76                         break;
  77         }
  78
  79         if (it == en) // we never found a "von"
  80                 return pieces.back();
  81
  82         // reconstruct what we need to return
  83         docstring retval;
  84         bool first = true;
  85         for (; it != en; ++it) {
  86                 if (!first)
  87                         retval += " ";
  88                 else
  89                         first = false;
  90                 retval += *it;
  91         }
  92         return retval;
  93 }
  94
  95
  96 // converts a string containing LaTeX commands into unicode
  97 // for display.
  98 docstring convertLaTeXCommands(docstring const & str)
  99 {
 100         docstring val = str;
 101         docstring ret;
 102
 103         bool scanning_cmd = false;
 104         bool scanning_math = false;
 105         bool escaped = false; // used to catch \$, etc.
 106         while (!val.empty()) {
 107                 char_type const ch = val[0];
 108
 109                 // if we're scanning math, we output everything until we
 110                 // find an unescaped $, at which point we break out.
 111                 if (scanning_math) {
 112                         if (escaped)
 113                                 escaped = false;
 114                         else if (ch == '\\')
 115                                 escaped = true;
 116                         else if (ch == '$')
 117                                 scanning_math = false;
 118                         ret += ch;
 119                         val = val.substr(1);
 120                         continue;
 121                 }
 122
 123                 // if we're scanning a command name, then we just
 124                 // discard characters until we hit something that
 125                 // isn't alpha.
 126                 if (scanning_cmd) {
 127                         if (isAlphaASCII(ch)) {
 128                                 val = val.substr(1);
 129                                 escaped = false;
 130                                 continue;
 131                         }
 132                         // so we're done with this command.
 133                         // now we fall through and check this character.
 134                         scanning_cmd = false;
 135                 }
 136
 137                 // was the last character a \? If so, then this is something like:
 138                 // \\ or \$, so we'll just output it. That's probably not always right...
 139                 if (escaped) {
 140                         // exception: output \, as THIN SPACE
 141                         if (ch == ',')
 142                                 ret.push_back(0x2009);
 143                         else
 144                                 ret += ch;
 145                         val = val.substr(1);
 146                         escaped = false;
 147                         continue;
 148                 }
 149
 150                 if (ch == '$') {
 151                         ret += ch;
 152                         val = val.substr(1);
 153                         scanning_math = true;
 154                         continue;
 155                 }
 156
 157                 // we just ignore braces
 158                 if (ch == '{' || ch == '}') {
 159                         val = val.substr(1);
 160                         continue;
 161                 }
 162
 163                 // we're going to check things that look like commands, so if
 164                 // this doesn't, just output it.
 165                 if (ch != '\\') {
 166                         ret += ch;
 167                         val = val.substr(1);
 168                         continue;
 169                 }
 170
 171                 // ok, could be a command of some sort
 172                 // let's see if it corresponds to some unicode
 173                 // unicodesymbols has things in the form: \"{u},
 174                 // whereas we may see things like: \"u. So we'll
 175                 // look for that and change it, if necessary.
 176                 // FIXME: This is a sort of mini-tex2lyx.
 177                 //        Use the real tex2lyx instead!
 178                 static lyx::regex const reg("^\\\\\\W\\w");
 179                 if (lyx::regex_search(to_utf8(val), reg)) {
 180                         val.insert(3, from_ascii("}"));
 181                         val.insert(2, from_ascii("{"));
 182                 }
 183                 bool termination;
 184                 docstring rem;
 185                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 186                                 Encodings::TEXT_CMD, termination, rem);
 187                 if (!cnvtd.empty()) {
 188                         // it did, so we'll take that bit and proceed with what's left
 189                         ret += cnvtd;
 190                         val = rem;
 191                         continue;
 192                 }
 193                 // it's a command of some sort
 194                 scanning_cmd = true;
 195                 escaped = true;
 196                 val = val.substr(1);
 197         }
 198         return ret;
 199 }
 200
 201
 202 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 203 docstring processRichtext(docstring const & str, bool richtext)
 204 {
 205         docstring val = str;
 206         docstring ret;
 207
 208         bool scanning_rich = false;
 209         while (!val.empty()) {
 210                 char_type const ch = val[0];
 211                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 212                         // beginning of rich text
 213                         scanning_rich = true;
 214                         val = val.substr(2);
 215                         continue;
 216                 }
 217                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 218                         // end of rich text
 219                         scanning_rich = false;
 220                         val = val.substr(2);
 221                         continue;
 222                 }
 223                 if (richtext) {
 224                         if (scanning_rich)
 225                                 ret += ch;
 226                         else {
 227                                 // we need to escape '<' and '>'
 228                                 if (ch == '<')
 229                                         ret += "&lt;";
 230                                 else if (ch == '>')
 231                                         ret += "&gt;";
 232                                 else
 233                                         ret += ch;
 234                         }
 235                 } else if (!scanning_rich /* && !richtext */)
 236                         ret += ch;
 237                 // else the character is discarded, which will happen only if
 238                 // richtext == false and we are scanning rich text
 239                 val = val.substr(1);
 240         }
 241         return ret;
 242 }
 243
 244 } // anon namespace
 245
 246
 247 //////////////////////////////////////////////////////////////////////
 248 //
 249 // BibTeXInfo
 250 //
 251 //////////////////////////////////////////////////////////////////////
 252
 253 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 254         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 255           modifier_(0)
 256 {}
 257
 258
 259 docstring const BibTeXInfo::getAuthorList(
 260     Buffer const * buf, bool full, bool forceshort) const
 261 {
 262         // Maxnames treshold depend on engine
 263         size_t maxnames = buf ?
 264                 buf->params().documentClass().max_citenames() : 2;
 265
 266         if (!is_bibtex_) {
 267                 docstring const opt = label();
 268                 if (opt.empty())
 269                         return docstring();
 270
 271                 docstring authors;
 272                 docstring const remainder = trim(split(opt, authors, '('));
 273                 if (remainder.empty())
 274                         // in this case, we didn't find a "(",
 275                         // so we don't have author (year)
 276                         return docstring();
 277                 return authors;
 278         }
 279
 280         docstring author = operator[]("author");
 281         if (author.empty()) {
 282                 author = operator[]("editor");
 283                 if (author.empty())
 284                         return author;
 285         }
 286
 287         // FIXME Move this to a separate routine that can
 288         // be called from elsewhere.
 289         //
 290         // OK, we've got some names. Let's format them.
 291         // Try to split the author list on " and "
 292         vector<docstring> const authors =
 293                 getVectorFromString(author, from_ascii(" and "));
 294
 295         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 296                 docstring shortauthor = familyName(authors[0])
 297                         + "/" + familyName(authors[1]);
 298                 if (authors.size() == 3)
 299                         shortauthor += "/" + familyName(authors[2]);
 300                 return convertLaTeXCommands(shortauthor);
 301         }
 302         docstring retval;
 303
 304         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 305                                                : ENGINE_TYPE_DEFAULT;
 306
 307         // These are defined in the styles
 308         string const etal =
 309                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
 310                     : " et al.";
 311         string const namesep =
 312                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
 313                    : ", ";
 314         string const lastnamesep =
 315                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
 316                     : ", and ";
 317         string const pairnamesep =
 318                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
 319                      : " and ";
 320
 321         // Shorten the list (with et al.) if forceshort is set
 322         // and the list can actually be shorten, else if maxcitenames
 323         // is passed and full is not set.
 324         bool shorten = forceshort && authors.size() > 1;
 325         vector<docstring>::const_iterator it = authors.begin();
 326         vector<docstring>::const_iterator en = authors.end();
 327         for (size_t i = 0; it != en; ++it, ++i) {
 328                 if (i >= maxnames && !full) {
 329                         shorten = true;
 330                         break;
 331                 }
 332                 if (*it == "others") {
 333                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 334                         break;
 335                 }
 336                 if (i > 0 && i == authors.size() - 1) {
 337                         if (authors.size() == 2)
 338                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 339                         else
 340                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 341                 } else if (i > 0)
 342                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 343                 retval += familyName(*it);
 344         }
 345         if (shorten)
 346                 retval = familyName(authors[0]) + (buf ? buf->B_(etal) : from_ascii(etal));
 347
 348         return convertLaTeXCommands(retval);
 349 }
 350
 351
 352 docstring const BibTeXInfo::getYear() const
 353 {
 354         if (is_bibtex_) {
 355                 // first try legacy year field
 356                 docstring year = operator[]("year");
 357                 if (!year.empty())
 358                         return year;
 359                 // now try biblatex's date field
 360                 year = operator[]("date");
 361                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 362                 // We only want the years.
 363                 static regex const yreg("[-]?([\\d]{4}).*");
 364                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 365                 smatch sm;
 366                 string const date = to_utf8(year);
 367                 regex_match(date, sm, yreg);
 368                 year = from_ascii(sm[1]);
 369                 // check for an endyear
 370                 if (regex_match(date, sm, ereg))
 371                         year += char_type(0x2013) + from_ascii(sm[1]);
 372                 return year;
 373         }
 374
 375         docstring const opt = label();
 376         if (opt.empty())
 377                 return docstring();
 378
 379         docstring authors;
 380         docstring tmp = split(opt, authors, '(');
 381         if (tmp.empty())
 382                 // we don't have author (year)
 383                 return docstring();
 384         docstring year;
 385         tmp = split(tmp, year, ')');
 386         return year;
 387 }
 388
 389
 390 namespace {
 391
 392 docstring parseOptions(docstring const & format, string & optkey,
 393                     docstring & ifpart, docstring & elsepart);
 394
 395 // Calls parseOptions to deal with an embedded option, such as:
 396 //   {%number%[[, no.~%number%]]}
 397 // which must appear at the start of format. ifelsepart gets the
 398 // whole of the option, and we return what's left after the option.
 399 // we return format if there is an error.
 400 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 401 {
 402         LASSERT(format[0] == '{' && format[1] == '%', return format);
 403         string optkey;
 404         docstring ifpart;
 405         docstring elsepart;
 406         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 407         if (format == rest) { // parse error
 408                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 409                 return format;
 410         }
 411         LASSERT(rest.size() <= format.size(),
 412                 { ifelsepart = docstring(); return format; });
 413         ifelsepart = format.substr(0, format.size() - rest.size());
 414         return rest;
 415 }
 416
 417
 418 // Gets a "clause" from a format string, where the clause is
 419 // delimited by '[[' and ']]'. Returns what is left after the
 420 // clause is removed, and returns format if there is an error.
 421 docstring getClause(docstring const & format, docstring & clause)
 422 {
 423         docstring fmt = format;
 424         // remove '[['
 425         fmt = fmt.substr(2);
 426         // we'll remove characters from the front of fmt as we
 427         // deal with them
 428         while (!fmt.empty()) {
 429                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 430                         // that's the end
 431                         fmt = fmt.substr(2);
 432                         break;
 433                 }
 434                 // check for an embedded option
 435                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 436                         docstring part;
 437                         docstring const rest = parseEmbeddedOption(fmt, part);
 438                         if (fmt == rest) {
 439                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 440                                 return format;
 441                         }
 442                         clause += part;
 443                         fmt = rest;
 444                 } else { // it's just a normal character
 445                                 clause += fmt[0];
 446                                 fmt = fmt.substr(1);
 447                 }
 448         }
 449         return fmt;
 450 }
 451
 452
 453 // parse an options string, which must appear at the start of the
 454 // format parameter. puts the parsed bits in optkey, ifpart, and
 455 // elsepart and returns what's left after the option is removed.
 456 // if there's an error, it returns format itself.
 457 docstring parseOptions(docstring const & format, string & optkey,
 458                     docstring & ifpart, docstring & elsepart)
 459 {
 460         LASSERT(format[0] == '{' && format[1] == '%', return format);
 461         // strip '{%'
 462         docstring fmt = format.substr(2);
 463         size_t pos = fmt.find('%'); // end of key
 464         if (pos == string::npos) {
 465                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 466                 return format;
 467         }
 468         optkey = to_utf8(fmt.substr(0, pos));
 469         fmt = fmt.substr(pos + 1);
 470         // [[format]] should be next
 471         if (fmt[0] != '[' || fmt[1] != '[') {
 472                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 473                 return format;
 474         }
 475
 476         docstring curfmt = fmt;
 477         fmt = getClause(curfmt, ifpart);
 478         if (fmt == curfmt) {
 479                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 480                 return format;
 481         }
 482
 483         if (fmt[0] == '}') // we're done, no else clause
 484                 return fmt.substr(1);
 485
 486         // else part should follow
 487         if (fmt[0] != '[' || fmt[1] != '[') {
 488                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 489                 return format;
 490         }
 491
 492         curfmt = fmt;
 493         fmt = getClause(curfmt, elsepart);
 494         // we should be done
 495         if (fmt == curfmt || fmt[0] != '}') {
 496                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 497                 return format;
 498         }
 499         return fmt.substr(1);
 500 }
 501
 502
 503 } // anon namespace
 504
 505 /* FIXME
 506 Bug #9131 revealed an oddity in how we are generating citation information
 507 when more than one key is given. We end up building a longer and longer format
 508 string as we go, which we then have to re-parse, over and over and over again,
 509 rather than generating the information for the individual keys and then putting
 510 all of that together. We do that to deal with the way separators work, from what
 511 I can tell, but it still feels like a hack. Fixing this would require quite a
 512 bit of work, however.
 513 */
 514 docstring BibTeXInfo::expandFormat(docstring const & format,
 515                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 516                 CiteItem const & ci, bool next, bool second) const
 517 {
 518         // incorrect use of macros could put us in an infinite loop
 519         static int const max_passes = 5000;
 520         // the use of overly large keys can lead to performance problems, due
 521         // to eventual attempts to convert LaTeX macros to unicode. See bug
 522         // #8944. This is perhaps not the best solution, but it will have to
 523         // do for now.
 524         static size_t const max_keysize = 128;
 525         odocstringstream ret; // return value
 526         string key;
 527         bool scanning_key = false;
 528         bool scanning_rich = false;
 529
 530         CiteEngineType const engine_type = buf.params().citeEngineType();
 531         docstring fmt = format;
 532         // we'll remove characters from the front of fmt as we
 533         // deal with them
 534         while (!fmt.empty()) {
 535                 if (counter > max_passes) {
 536                         LYXERR0("Recursion limit reached while parsing `"
 537                                 << format << "'.");
 538                         return _("ERROR!");
 539                 }
 540
 541                 char_type thischar = fmt[0];
 542                 if (thischar == '%') {
 543                         // beginning or end of key
 544                         if (scanning_key) {
 545                                 // end of key
 546                                 scanning_key = false;
 547                                 // so we replace the key with its value, which may be empty
 548                                 if (key[0] == '!') {
 549                                         // macro
 550                                         string const val =
 551                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 552                                         fmt = from_utf8(val) + fmt.substr(1);
 553                                         counter += 1;
 554                                         continue;
 555                                 } else if (key[0] == '_') {
 556                                         // a translatable bit
 557                                         string const val =
 558                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 559                                         docstring const trans =
 560                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 561                                         ret << trans;
 562                                 } else {
 563                                         docstring const val =
 564                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 565                                         if (!scanning_rich)
 566                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 567                                         ret << val;
 568                                         if (!scanning_rich)
 569                                                 ret << from_ascii("{!</span>!}");
 570                                 }
 571                         } else {
 572                                 // beginning of key
 573                                 key.clear();
 574                                 scanning_key = true;
 575                         }
 576                 }
 577                 else if (thischar == '{') {
 578                         // beginning of option?
 579                         if (scanning_key) {
 580                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 581                                 return _("ERROR!");
 582                         }
 583                         if (fmt.size() > 1) {
 584                                 if (fmt[1] == '%') {
 585                                         // it is the beginning of an optional format
 586                                         string optkey;
 587                                         docstring ifpart;
 588                                         docstring elsepart;
 589                                         docstring const newfmt =
 590                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 591                                         if (newfmt == fmt) // parse error
 592                                                 return _("ERROR!");
 593                                         fmt = newfmt;
 594                                         docstring const val =
 595                                                 getValueForKey(optkey, buf, ci, xrefs);
 596                                         if (optkey == "next" && next)
 597                                                 ret << ifpart; // without expansion
 598                                         else if (!val.empty()) {
 599                                                 int newcounter = 0;
 600                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 601                                                         ci, next);
 602                                         } else if (!elsepart.empty()) {
 603                                                 int newcounter = 0;
 604                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 605                                                         ci, next);
 606                                         }
 607                                         // fmt will have been shortened for us already
 608                                         continue;
 609                                 }
 610                                 if (fmt[1] == '!') {
 611                                         // beginning of rich text
 612                                         scanning_rich = true;
 613                                         fmt = fmt.substr(2);
 614                                         ret << from_ascii("{!");
 615                                         continue;
 616                                 }
 617                         }
 618                         // we are here if '{' was not followed by % or !.
 619                         // So it's just a character.
 620                         ret << thischar;
 621                 }
 622                 else if (scanning_rich && thischar == '!'
 623                          && fmt.size() > 1 && fmt[1] == '}') {
 624                         // end of rich text
 625                         scanning_rich = false;
 626                         fmt = fmt.substr(2);
 627                         ret << from_ascii("!}");
 628                         continue;
 629                 }
 630                 else if (scanning_key)
 631                         key += char(thischar);
 632                 else {
 633                         try {
 634                                 ret.put(thischar);
 635                         } catch (EncodingException & /* e */) {
 636                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 637                         }
 638                 }
 639                 fmt = fmt.substr(1);
 640         } // for loop
 641         if (scanning_key) {
 642                 LYXERR0("Never found end of key in `" << format << "'!");
 643                 return _("ERROR!");
 644         }
 645         if (scanning_rich) {
 646                 LYXERR0("Never found end of rich text in `" << format << "'!");
 647                 return _("ERROR!");
 648         }
 649         return ret.str();
 650 }
 651
 652
 653 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 654         Buffer const & buf, CiteItem const & ci) const
 655 {
 656         bool const richtext = ci.richtext;
 657
 658         if (!richtext && !info_.empty())
 659                 return info_;
 660         if (richtext && !info_richtext_.empty())
 661                 return info_richtext_;
 662
 663         if (!is_bibtex_) {
 664                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 665                 info_ = it->second;
 666                 return info_;
 667         }
 668
 669         CiteEngineType const engine_type = buf.params().citeEngineType();
 670         DocumentClass const & dc = buf.params().documentClass();
 671         docstring const & format =
 672                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 673         int counter = 0;
 674         info_ = expandFormat(format, xrefs, counter, buf,
 675                 ci, false, false);
 676
 677         if (info_.empty()) {
 678                 // this probably shouldn't happen
 679                 return info_;
 680         }
 681
 682         if (richtext) {
 683                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 684                 return info_richtext_;
 685         }
 686
 687         info_ = convertLaTeXCommands(processRichtext(info_, false));
 688         return info_;
 689 }
 690
 691
 692 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 693         Buffer const & buf, docstring const & format,
 694         CiteItem const & ci, bool next, bool second) const
 695 {
 696         docstring loclabel;
 697
 698         int counter = 0;
 699         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 700
 701         if (!loclabel.empty() && !next) {
 702                 loclabel = processRichtext(loclabel, ci.richtext);
 703                 loclabel = convertLaTeXCommands(loclabel);
 704         }
 705
 706         return loclabel;
 707 }
 708
 709
 710 docstring const & BibTeXInfo::operator[](docstring const & field) const
 711 {
 712         BibTeXInfo::const_iterator it = find(field);
 713         if (it != end())
 714                 return it->second;
 715         static docstring const empty_value = docstring();
 716         return empty_value;
 717 }
 718
 719
 720 docstring const & BibTeXInfo::operator[](string const & field) const
 721 {
 722         return operator[](from_ascii(field));
 723 }
 724
 725
 726 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 727         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 728 {
 729         // anything less is pointless
 730         LASSERT(maxsize >= 16, maxsize = 16);
 731         string key = oldkey;
 732         bool cleanit = false;
 733         if (prefixIs(oldkey, "clean:")) {
 734                 key = oldkey.substr(6);
 735                 cleanit = true;
 736         }
 737
 738         docstring ret = operator[](key);
 739         if (ret.empty() && !xrefs.empty()) {
 740                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 741                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 742                 for (; it != en; ++it) {
 743                         if (*it && !(**it)[key].empty()) {
 744                                 ret = (**it)[key];
 745                                 break;
 746                         }
 747                 }
 748         }
 749         if (ret.empty()) {
 750                 // some special keys
 751                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 752                 if (key == "dialog" && ci.context == CiteItem::Dialog)
 753                         ret = from_ascii("x"); // any non-empty string will do
 754                 else if (key == "ifstar" && ci.Starred)
 755                         ret = from_ascii("x"); // any non-empty string will do
 756                 else if (key == "entrytype")
 757                         ret = entry_type_;
 758                 else if (key == "key")
 759                         ret = bib_key_;
 760                 else if (key == "label")
 761                         ret = label_;
 762                 else if (key == "modifier" && modifier_ != 0)
 763                         ret = modifier_;
 764                 else if (key == "numericallabel")
 765                         ret = cite_number_;
 766                 else if (key == "shortauthor")
 767                         // When shortauthor is not defined, jurabib automatically
 768                         // provides jurabib-style abbreviated author names. We do
 769                         // this as well.
 770                         ret = getAbbreviatedAuthor(&buf, true);
 771                 else if (key == "shorttitle") {
 772                         // When shorttitle is not defined, jurabib uses for `article'
 773                         // and `periodical' entries the form `journal volume [year]'
 774                         // and for other types of entries it uses the `title' field.
 775                         if (entry_type_ == "article" || entry_type_ == "periodical")
 776                                 ret = operator[]("journal") + " " + operator[]("volume")
 777                                         + " [" + operator[]("year") + "]";
 778                         else
 779                                 ret = operator[]("title");
 780                 else if (key == "abbrvauthor") {
 781                         // Special key to provide abbreviated author names,
 782                         // with respect to maxcitenames.
 783                         ret = getAuthorList(&buf, false, false);
 784                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 785                                 ret[0] = uppercase(ret[0]);
 786                 } else if (key == "fullauthor") {
 787                         // Return a full author list
 788                         ret = getAuthorList(&buf, true, false);
 789                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 790                                 ret[0] = uppercase(ret[0]);
 791                 } else if (key == "forceabbrvauthor") {
 792                         // Special key to provide abbreviated author names,
 793                         // irrespective of maxcitenames.
 794                         ret = getAuthorList(&buf, false, true);
 795                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 796                                 ret[0] = uppercase(ret[0]);
 797                 } else if (key == "bibentry") {
 798                         // Special key to provide the full bibliography entry: see getInfo()
 799                         CiteEngineType const engine_type = buf.params().citeEngineType();
 800                         DocumentClass const & dc = buf.params().documentClass();
 801                         docstring const & format =
 802                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 803                         int counter = 0;
 804                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
 805                 } else if (key == "textbefore")
 806                         ret = ci.textBefore;
 807                 else if (key == "textafter")
 808                         ret = ci.textAfter;
 809                 else if (key == "year")
 810                         ret = getYear();
 811         }
 812
 813         if (cleanit)
 814                 ret = html::cleanAttr(ret);
 815
 816         // make sure it is not too big
 817         support::truncateWithEllipsis(ret, maxsize);
 818         return ret;
 819 }
 820
 821
 822 //////////////////////////////////////////////////////////////////////
 823 //
 824 // BiblioInfo
 825 //
 826 //////////////////////////////////////////////////////////////////////
 827
 828 namespace {
 829
 830 // A functor for use with sort, leading to case insensitive sorting
 831 class compareNoCase: public binary_function<docstring, docstring, bool>
 832 {
 833 public:
 834         bool operator()(docstring const & s1, docstring const & s2) const {
 835                 return compare_no_case(s1, s2) < 0;
 836         }
 837 };
 838
 839 } // namespace anon
 840
 841
 842 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
 843 {
 844         vector<docstring> result;
 845         if (!data.isBibTeX())
 846                 return result;
 847         // Legacy crossref field. This is not nestable.
 848         if (!nested && !data["crossref"].empty()) {
 849                 docstring const xrefkey = data["crossref"];
 850                 result.push_back(xrefkey);
 851                 // However, check for nested xdatas
 852                 BiblioInfo::const_iterator it = find(xrefkey);
 853                 if (it != end()) {
 854                         BibTeXInfo const & xref = it->second;
 855                         vector<docstring> const nxdata = getXRefs(xref, true);
 856                         if (!nxdata.empty())
 857                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
 858                 }
 859         }
 860         // Biblatex's xdata field. Infinitely nestable.
 861         // XData field can consist of a comma-separated list of keys
 862         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
 863         if (!xdatakeys.empty()) {
 864                 vector<docstring>::const_iterator xit = xdatakeys.begin();
 865                 vector<docstring>::const_iterator xen = xdatakeys.end();
 866                 for (; xit != xen; ++xit) {
 867                         docstring const xdatakey = *xit;
 868                         result.push_back(xdatakey);
 869                         BiblioInfo::const_iterator it = find(xdatakey);
 870                         if (it != end()) {
 871                                 BibTeXInfo const & xdata = it->second;
 872                                 vector<docstring> const nxdata = getXRefs(xdata, true);
 873                                 if (!nxdata.empty())
 874                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
 875                         }
 876                 }
 877         }
 878         return result;
 879 }
 880
 881
 882 vector<docstring> const BiblioInfo::getKeys() const
 883 {
 884         vector<docstring> bibkeys;
 885         BiblioInfo::const_iterator it  = begin();
 886         for (; it != end(); ++it)
 887                 bibkeys.push_back(it->first);
 888         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 889         return bibkeys;
 890 }
 891
 892
 893 vector<docstring> const BiblioInfo::getFields() const
 894 {
 895         vector<docstring> bibfields;
 896         set<docstring>::const_iterator it = field_names_.begin();
 897         set<docstring>::const_iterator end = field_names_.end();
 898         for (; it != end; ++it)
 899                 bibfields.push_back(*it);
 900         sort(bibfields.begin(), bibfields.end());
 901         return bibfields;
 902 }
 903
 904
 905 vector<docstring> const BiblioInfo::getEntries() const
 906 {
 907         vector<docstring> bibentries;
 908         set<docstring>::const_iterator it = entry_types_.begin();
 909         set<docstring>::const_iterator end = entry_types_.end();
 910         for (; it != end; ++it)
 911                 bibentries.push_back(*it);
 912         sort(bibentries.begin(), bibentries.end());
 913         return bibentries;
 914 }
 915
 916
 917 docstring const BiblioInfo::getAuthorList(docstring const & key, Buffer const & buf) const
 918 {
 919         BiblioInfo::const_iterator it = find(key);
 920         if (it == end())
 921                 return docstring();
 922         BibTeXInfo const & data = it->second;
 923         return data.getAuthorList(&buf, false);
 924 }
 925
 926
 927 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 928 {
 929         BiblioInfo::const_iterator it = find(key);
 930         if (it == end())
 931                 return docstring();
 932         BibTeXInfo const & data = it->second;
 933         return data.citeNumber();
 934 }
 935
 936
 937 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 938 {
 939         BiblioInfo::const_iterator it = find(key);
 940         if (it == end())
 941                 return docstring();
 942         BibTeXInfo const & data = it->second;
 943         docstring year = data.getYear();
 944         if (year.empty()) {
 945                 // let's try the crossrefs
 946                 vector<docstring> const xrefs = getXRefs(data);
 947                 if (xrefs.empty())
 948                         // no luck
 949                         return docstring();
 950                 vector<docstring>::const_iterator it = xrefs.begin();
 951                 vector<docstring>::const_iterator en = xrefs.end();
 952                 for (; it != en; ++it) {
 953                         BiblioInfo::const_iterator const xrefit = find(*it);
 954                         if (xrefit == end())
 955                                 continue;
 956                         BibTeXInfo const & xref_data = xrefit->second;
 957                         year = xref_data.getYear();
 958                         if (!year.empty())
 959                                 // success!
 960                                 break;
 961                 }
 962         }
 963         if (use_modifier && data.modifier() != 0)
 964                 year += data.modifier();
 965         return year;
 966 }
 967
 968
 969 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 970 {
 971         docstring const year = getYear(key, use_modifier);
 972         if (year.empty())
 973                 return buf.B_("No year");
 974         return year;
 975 }
 976
 977
 978 docstring const BiblioInfo::getInfo(docstring const & key,
 979         Buffer const & buf, CiteItem const & ci) const
 980 {
 981         BiblioInfo::const_iterator it = find(key);
 982         if (it == end())
 983                 return docstring(_("Bibliography entry not found!"));
 984         BibTeXInfo const & data = it->second;
 985         BibTeXInfoList xrefptrs;
 986         vector<docstring> const xrefs = getXRefs(data);
 987         if (!xrefs.empty()) {
 988                 vector<docstring>::const_iterator it = xrefs.begin();
 989                 vector<docstring>::const_iterator en = xrefs.end();
 990                 for (; it != en; ++it) {
 991                         BiblioInfo::const_iterator const xrefit = find(*it);
 992                         if (xrefit != end())
 993                                 xrefptrs.push_back(&(xrefit->second));
 994                 }
 995         }
 996         return data.getInfo(xrefptrs, buf, ci);
 997 }
 998
 999
1000 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1001         Buffer const & buf, string const & style, CiteItem const & ci) const
1002 {
1003         size_t max_size = ci.max_size;
1004         // shorter makes no sense
1005         LASSERT(max_size >= 16, max_size = 16);
1006
1007         // we can't display more than 10 of these, anyway
1008         bool const too_many_keys = keys.size() > 10;
1009         if (too_many_keys)
1010                 keys.resize(10);
1011
1012         CiteEngineType const engine_type = buf.params().citeEngineType();
1013         DocumentClass const & dc = buf.params().documentClass();
1014         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
1015         docstring ret = format;
1016         vector<docstring>::const_iterator key = keys.begin();
1017         vector<docstring>::const_iterator ken = keys.end();
1018         for (; key != ken; ++key) {
1019                 BiblioInfo::const_iterator it = find(*key);
1020                 BibTeXInfo empty_data;
1021                 empty_data.key(*key);
1022                 BibTeXInfo & data = empty_data;
1023                 vector<BibTeXInfo const *> xrefptrs;
1024                 if (it != end()) {
1025                         data = it->second;
1026                         vector<docstring> const xrefs = getXRefs(data);
1027                         if (!xrefs.empty()) {
1028                                 vector<docstring>::const_iterator it = xrefs.begin();
1029                                 vector<docstring>::const_iterator en = xrefs.end();
1030                                 for (; it != en; ++it) {
1031                                         BiblioInfo::const_iterator const xrefit = find(*it);
1032                                         if (xrefit != end())
1033                                                 xrefptrs.push_back(&(xrefit->second));
1034                                 }
1035                         }
1036                 }
1037                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1038         }
1039
1040         if (too_many_keys)
1041                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1042         support::truncateWithEllipsis(ret, max_size);
1043         return ret;
1044 }
1045
1046
1047 bool BiblioInfo::isBibtex(docstring const & key) const
1048 {
1049         docstring key1;
1050         split(key, key1, ',');
1051         BiblioInfo::const_iterator it = find(key1);
1052         if (it == end())
1053                 return false;
1054         return it->second.isBibTeX();
1055 }
1056
1057
1058 vector<docstring> const BiblioInfo::getCiteStrings(
1059         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1060         Buffer const & buf, CiteItem const & ci) const
1061 {
1062         if (empty())
1063                 return vector<docstring>();
1064
1065         string style;
1066         vector<docstring> vec(styles.size());
1067         for (size_t i = 0; i != vec.size(); ++i) {
1068                 style = styles[i].name;
1069                 vec[i] = getLabel(keys, buf, style, ci);
1070         }
1071
1072         return vec;
1073 }
1074
1075
1076 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1077 {
1078         bimap_.insert(info.begin(), info.end());
1079         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1080         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1081 }
1082
1083
1084 namespace {
1085
1086 // used in xhtml to sort a list of BibTeXInfo objects
1087 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1088 {
1089         docstring const lauth = lhs->getAuthorList();
1090         docstring const rauth = rhs->getAuthorList();
1091         docstring const lyear = lhs->getYear();
1092         docstring const ryear = rhs->getYear();
1093         docstring const ltitl = lhs->operator[]("title");
1094         docstring const rtitl = rhs->operator[]("title");
1095         return  (lauth < rauth)
1096                 || (lauth == rauth && lyear < ryear)
1097                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1098 }
1099
1100 }
1101
1102
1103 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1104 {
1105         cited_entries_.clear();
1106         // We are going to collect all the citation keys used in the document,
1107         // getting them from the TOC.
1108         // FIXME We may want to collect these differently, in the first case,
1109         // so that we might have them in order of appearance.
1110         set<docstring> citekeys;
1111         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1112         Toc::const_iterator it = toc->begin();
1113         Toc::const_iterator const en = toc->end();
1114         for (; it != en; ++it) {
1115                 if (it->str().empty())
1116                         continue;
1117                 vector<docstring> const keys = getVectorFromString(it->str());
1118                 citekeys.insert(keys.begin(), keys.end());
1119         }
1120         if (citekeys.empty())
1121                 return;
1122
1123         // We have a set of the keys used in this document.
1124         // We will now convert it to a list of the BibTeXInfo objects used in
1125         // this document...
1126         vector<BibTeXInfo const *> bi;
1127         set<docstring>::const_iterator cit = citekeys.begin();
1128         set<docstring>::const_iterator const cen = citekeys.end();
1129         for (; cit != cen; ++cit) {
1130                 BiblioInfo::const_iterator const bt = find(*cit);
1131                 if (bt == end() || !bt->second.isBibTeX())
1132                         continue;
1133                 bi.push_back(&(bt->second));
1134         }
1135         // ...and sort it.
1136         sort(bi.begin(), bi.end(), lSorter);
1137
1138         // Now we can write the sorted keys
1139         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1140         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1141         for (; bit != ben; ++bit)
1142                 cited_entries_.push_back((*bit)->key());
1143 }
1144
1145
1146 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1147 {
1148         collectCitedEntries(buf);
1149         CiteEngineType const engine_type = buf.params().citeEngineType();
1150         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1151
1152         int keynumber = 0;
1153         char modifier = 0;
1154         // used to remember the last one we saw
1155         // we'll be comparing entries to see if we need to add
1156         // modifiers, like "1984a"
1157         map<docstring, BibTeXInfo>::iterator last;
1158
1159         vector<docstring>::const_iterator it = cited_entries_.begin();
1160         vector<docstring>::const_iterator const en = cited_entries_.end();
1161         for (; it != en; ++it) {
1162                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1163                 // this shouldn't happen, but...
1164                 if (biit == bimap_.end())
1165                         // ...fail gracefully, anyway.
1166                         continue;
1167                 BibTeXInfo & entry = biit->second;
1168                 if (numbers) {
1169                         docstring const num = convert<docstring>(++keynumber);
1170                         entry.setCiteNumber(num);
1171                 } else {
1172                         // coverity complains about our derefercing the iterator last,
1173                         // which was not initialized above. but it does get initialized
1174                         // after the first time through the loop, which is the point of
1175                         // the first test.
1176                         // coverity[FORWARD_NULL]
1177                         if (it != cited_entries_.begin()
1178                             && entry.getAuthorList() == last->second.getAuthorList()
1179                             // we access the year via getYear() so as to get it from the xref,
1180                             // if we need to do so
1181                             && getYear(entry.key()) == getYear(last->second.key())) {
1182                                 if (modifier == 0) {
1183                                         // so the last one should have been 'a'
1184                                         last->second.setModifier('a');
1185                                         modifier = 'b';
1186                                 } else if (modifier == 'z')
1187                                         modifier = 'A';
1188                                 else
1189                                         modifier++;
1190                         } else {
1191                                 modifier = 0;
1192                         }
1193                         entry.setModifier(modifier);
1194                         // remember the last one
1195                         last = biit;
1196                 }
1197         }
1198         // Set the labels
1199         it = cited_entries_.begin();
1200         for (; it != en; ++it) {
1201                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1202                 // this shouldn't happen, but...
1203                 if (biit == bimap_.end())
1204                         // ...fail gracefully, anyway.
1205                         continue;
1206                 BibTeXInfo & entry = biit->second;
1207                 if (numbers) {
1208                         entry.label(entry.citeNumber());
1209                 } else {
1210                         docstring const auth = entry.getAuthorList(&buf, false);
1211                         // we do it this way so as to access the xref, if necessary
1212                         // note that this also gives us the modifier
1213                         docstring const year = getYear(*it, buf, true);
1214                         if (!auth.empty() && !year.empty())
1215                                 entry.label(auth + ' ' + year);
1216                         else
1217                                 entry.label(entry.key());
1218                 }
1219         }
1220 }
1221
1222
1223 //////////////////////////////////////////////////////////////////////
1224 //
1225 // CitationStyle
1226 //
1227 //////////////////////////////////////////////////////////////////////
1228
1229
1230 CitationStyle citationStyleFromString(string const & command,
1231                                       BufferParams const & params)
1232 {
1233         CitationStyle cs;
1234         if (command.empty())
1235                 return cs;
1236
1237         string const alias = params.getCiteAlias(command);
1238         string cmd = alias.empty() ? command : alias;
1239         if (isUpperCase(command[0])) {
1240                 cs.forceUpperCase = true;
1241                 cmd[0] = lowercase(cmd[0]);
1242         }
1243
1244         size_t const n = command.size() - 1;
1245         if (command[n] == '*') {
1246                 cs.hasStarredVersion = true;
1247                 if (suffixIs(cmd, '*'))
1248                         cmd = cmd.substr(0, cmd.size() - 1);
1249         }
1250
1251         cs.name = cmd;
1252         return cs;
1253 }
1254
1255
1256 string citationStyleToString(const CitationStyle & cs, bool const latex)
1257 {
1258         string cmd = latex ? cs.cmd : cs.name;
1259         if (cs.forceUpperCase)
1260                 cmd[0] = uppercase(cmd[0]);
1261         if (cs.hasStarredVersion)
1262                 cmd += '*';
1263         return cmd;
1264 }
1265
1266 } // namespace lyx