src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Encoding.h"
  21 #include "InsetIterator.h"
  22 #include "Language.h"
  23 #include "output_xhtml.h"
  24 #include "Paragraph.h"
  25 #include "TextClass.h"
  26 #include "TocBackend.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/gettext.h"
  32 #include "support/lassert.h"
  33 #include "support/lstrings.h"
  34 #include "support/regex.h"
  35 #include "support/textutils.h"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).empty())
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94
  95 // converts a string containing LaTeX commands into unicode
  96 // for display.
  97 docstring convertLaTeXCommands(docstring const & str)
  98 {
  99         docstring val = str;
 100         docstring ret;
 101
 102         bool scanning_cmd = false;
 103         bool scanning_math = false;
 104         bool escaped = false; // used to catch \$, etc.
 105         while (!val.empty()) {
 106                 char_type const ch = val[0];
 107
 108                 // if we're scanning math, we output everything until we
 109                 // find an unescaped $, at which point we break out.
 110                 if (scanning_math) {
 111                         if (escaped)
 112                                 escaped = false;
 113                         else if (ch == '\\')
 114                                 escaped = true;
 115                         else if (ch == '$')
 116                                 scanning_math = false;
 117                         ret += ch;
 118                         val = val.substr(1);
 119                         continue;
 120                 }
 121
 122                 // if we're scanning a command name, then we just
 123                 // discard characters until we hit something that
 124                 // isn't alpha.
 125                 if (scanning_cmd) {
 126                         if (isAlphaASCII(ch)) {
 127                                 val = val.substr(1);
 128                                 escaped = false;
 129                                 continue;
 130                         }
 131                         // so we're done with this command.
 132                         // now we fall through and check this character.
 133                         scanning_cmd = false;
 134                 }
 135
 136                 // was the last character a \? If so, then this is something like:
 137                 // \\ or \$, so we'll just output it. That's probably not always right...
 138                 if (escaped) {
 139                         // exception: output \, as THIN SPACE
 140                         if (ch == ',')
 141                                 ret.push_back(0x2009);
 142                         else
 143                                 ret += ch;
 144                         val = val.substr(1);
 145                         escaped = false;
 146                         continue;
 147                 }
 148
 149                 if (ch == '$') {
 150                         ret += ch;
 151                         val = val.substr(1);
 152                         scanning_math = true;
 153                         continue;
 154                 }
 155
 156                 // we just ignore braces
 157                 if (ch == '{' || ch == '}') {
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // we're going to check things that look like commands, so if
 163                 // this doesn't, just output it.
 164                 if (ch != '\\') {
 165                         ret += ch;
 166                         val = val.substr(1);
 167                         continue;
 168                 }
 169
 170                 // ok, could be a command of some sort
 171                 // let's see if it corresponds to some unicode
 172                 // unicodesymbols has things in the form: \"{u},
 173                 // whereas we may see things like: \"u. So we'll
 174                 // look for that and change it, if necessary.
 175                 // FIXME: This is a sort of mini-tex2lyx.
 176                 //        Use the real tex2lyx instead!
 177                 static lyx::regex const reg("^\\\\\\W\\w");
 178                 if (lyx::regex_search(to_utf8(val), reg)) {
 179                         val.insert(3, from_ascii("}"));
 180                         val.insert(2, from_ascii("{"));
 181                 }
 182                 bool termination;
 183                 docstring rem;
 184                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 185                                 Encodings::TEXT_CMD, termination, rem);
 186                 if (!cnvtd.empty()) {
 187                         // it did, so we'll take that bit and proceed with what's left
 188                         ret += cnvtd;
 189                         val = rem;
 190                         continue;
 191                 }
 192                 // it's a command of some sort
 193                 scanning_cmd = true;
 194                 escaped = true;
 195                 val = val.substr(1);
 196         }
 197         return ret;
 198 }
 199
 200
 201 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 202 docstring processRichtext(docstring const & str, bool richtext)
 203 {
 204         docstring val = str;
 205         docstring ret;
 206
 207         bool scanning_rich = false;
 208         while (!val.empty()) {
 209                 char_type const ch = val[0];
 210                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 211                         // beginning of rich text
 212                         scanning_rich = true;
 213                         val = val.substr(2);
 214                         continue;
 215                 }
 216                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 217                         // end of rich text
 218                         scanning_rich = false;
 219                         val = val.substr(2);
 220                         continue;
 221                 }
 222                 if (richtext) {
 223                         if (scanning_rich)
 224                                 ret += ch;
 225                         else {
 226                                 // we need to escape '<' and '>'
 227                                 if (ch == '<')
 228                                         ret += "&lt;";
 229                                 else if (ch == '>')
 230                                         ret += "&gt;";
 231                                 else
 232                                         ret += ch;
 233                         }
 234                 } else if (!scanning_rich /* && !richtext */)
 235                         ret += ch;
 236                 // else the character is discarded, which will happen only if
 237                 // richtext == false and we are scanning rich text
 238                 val = val.substr(1);
 239         }
 240         return ret;
 241 }
 242
 243 } // anon namespace
 244
 245
 246 //////////////////////////////////////////////////////////////////////
 247 //
 248 // BibTeXInfo
 249 //
 250 //////////////////////////////////////////////////////////////////////
 251
 252 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 253         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 254           modifier_(0)
 255 {}
 256
 257
 258 docstring const BibTeXInfo::getAbbreviatedAuthor(
 259     Buffer const * buf, bool jurabib_style) const
 260 {
 261         if (!is_bibtex_) {
 262                 docstring const opt = label();
 263                 if (opt.empty())
 264                         return docstring();
 265
 266                 docstring authors;
 267                 docstring const remainder = trim(split(opt, authors, '('));
 268                 if (remainder.empty())
 269                         // in this case, we didn't find a "(",
 270                         // so we don't have author (year)
 271                         return docstring();
 272                 return authors;
 273         }
 274
 275         docstring author = operator[]("author");
 276         if (author.empty()) {
 277                 author = operator[]("editor");
 278                 if (author.empty())
 279                         return author;
 280         }
 281
 282         // FIXME Move this to a separate routine that can
 283         // be called from elsewhere.
 284         //
 285         // OK, we've got some names. Let's format them.
 286         // Try to split the author list on " and "
 287         vector<docstring> const authors =
 288                 getVectorFromString(author, from_ascii(" and "));
 289
 290         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 291                 docstring shortauthor = familyName(authors[0])
 292                         + "/" + familyName(authors[1]);
 293                 if (authors.size() == 3)
 294                         shortauthor += "/" + familyName(authors[2]);
 295                 return convertLaTeXCommands(shortauthor);
 296         }
 297
 298         docstring retval = familyName(authors[0]);
 299
 300         if (authors.size() == 2 && authors[1] != "others") {
 301                 docstring const dformat = buf ?
 302                         buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
 303                 retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
 304         } else if (authors.size() >= 2) {
 305                 // we get here either if the author list is longer than two names
 306                 // or if the second 'name' is "others". we do the same thing either
 307                 // way.
 308                 docstring const dformat = buf ?
 309                         buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
 310                 retval = bformat(dformat, familyName(authors[0]));
 311         }
 312
 313         return convertLaTeXCommands(retval);
 314 }
 315
 316
 317 docstring const BibTeXInfo::getYear() const
 318 {
 319         if (is_bibtex_) {
 320                 // first try legacy year field
 321                 docstring year = operator[]("year");
 322                 if (!year.empty())
 323                         return year;
 324                 // now try biblatex's date field
 325                 year = operator[]("date");
 326                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 327                 // We only want the years.
 328                 static regex const yreg("[-]?([\\d]{4}).*");
 329                 static regex const ereg(".*/([\\d]{4}).*");
 330                 smatch sm;
 331                 string const date = to_utf8(year);
 332                 regex_match(date, sm, yreg);
 333                 year = from_ascii(sm[1]);
 334                 // check for an endyear
 335                 if (regex_match(date, sm, ereg))
 336                         year += char_type(0x2013) + from_ascii(sm[1]);
 337                 return year;
 338         }
 339
 340         docstring const opt = label();
 341         if (opt.empty())
 342                 return docstring();
 343
 344         docstring authors;
 345         docstring tmp = split(opt, authors, '(');
 346         if (tmp.empty())
 347                 // we don't have author (year)
 348                 return docstring();
 349         docstring year;
 350         tmp = split(tmp, year, ')');
 351         return year;
 352 }
 353
 354
 355 docstring const BibTeXInfo::getXRef() const
 356 {
 357         if (!is_bibtex_)
 358                 return docstring();
 359         return operator[]("crossref");
 360 }
 361
 362
 363 namespace {
 364
 365 docstring parseOptions(docstring const & format, string & optkey,
 366                     docstring & ifpart, docstring & elsepart);
 367
 368 // Calls parseOptions to deal with an embedded option, such as:
 369 //   {%number%[[, no.~%number%]]}
 370 // which must appear at the start of format. ifelsepart gets the
 371 // whole of the option, and we return what's left after the option.
 372 // we return format if there is an error.
 373 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 374 {
 375         LASSERT(format[0] == '{' && format[1] == '%', return format);
 376         string optkey;
 377         docstring ifpart;
 378         docstring elsepart;
 379         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 380         if (format == rest) { // parse error
 381                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 382                 return format;
 383         }
 384         LASSERT(rest.size() <= format.size(),
 385                 { ifelsepart = docstring(); return format; });
 386         ifelsepart = format.substr(0, format.size() - rest.size());
 387         return rest;
 388 }
 389
 390
 391 // Gets a "clause" from a format string, where the clause is
 392 // delimited by '[[' and ']]'. Returns what is left after the
 393 // clause is removed, and returns format if there is an error.
 394 docstring getClause(docstring const & format, docstring & clause)
 395 {
 396         docstring fmt = format;
 397         // remove '[['
 398         fmt = fmt.substr(2);
 399         // we'll remove characters from the front of fmt as we
 400         // deal with them
 401         while (!fmt.empty()) {
 402                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 403                         // that's the end
 404                         fmt = fmt.substr(2);
 405                         break;
 406                 }
 407                 // check for an embedded option
 408                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 409                         docstring part;
 410                         docstring const rest = parseEmbeddedOption(fmt, part);
 411                         if (fmt == rest) {
 412                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 413                                 return format;
 414                         }
 415                         clause += part;
 416                         fmt = rest;
 417                 } else { // it's just a normal character
 418                                 clause += fmt[0];
 419                                 fmt = fmt.substr(1);
 420                 }
 421         }
 422         return fmt;
 423 }
 424
 425
 426 // parse an options string, which must appear at the start of the
 427 // format parameter. puts the parsed bits in optkey, ifpart, and
 428 // elsepart and returns what's left after the option is removed.
 429 // if there's an error, it returns format itself.
 430 docstring parseOptions(docstring const & format, string & optkey,
 431                     docstring & ifpart, docstring & elsepart)
 432 {
 433         LASSERT(format[0] == '{' && format[1] == '%', return format);
 434         // strip '{%'
 435         docstring fmt = format.substr(2);
 436         size_t pos = fmt.find('%'); // end of key
 437         if (pos == string::npos) {
 438                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 439                 return format;
 440         }
 441         optkey = to_utf8(fmt.substr(0, pos));
 442         fmt = fmt.substr(pos + 1);
 443         // [[format]] should be next
 444         if (fmt[0] != '[' || fmt[1] != '[') {
 445                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 446                 return format;
 447         }
 448
 449         docstring curfmt = fmt;
 450         fmt = getClause(curfmt, ifpart);
 451         if (fmt == curfmt) {
 452                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 453                 return format;
 454         }
 455
 456         if (fmt[0] == '}') // we're done, no else clause
 457                 return fmt.substr(1);
 458
 459         // else part should follow
 460         if (fmt[0] != '[' || fmt[1] != '[') {
 461                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 462                 return format;
 463         }
 464
 465         curfmt = fmt;
 466         fmt = getClause(curfmt, elsepart);
 467         // we should be done
 468         if (fmt == curfmt || fmt[0] != '}') {
 469                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 470                 return format;
 471         }
 472         return fmt.substr(1);
 473 }
 474
 475
 476 } // anon namespace
 477
 478 /* FIXME
 479 Bug #9131 revealed an oddity in how we are generating citation information
 480 when more than one key is given. We end up building a longer and longer format
 481 string as we go, which we then have to re-parse, over and over and over again,
 482 rather than generating the information for the individual keys and then putting
 483 all of that together. We do that to deal with the way separators work, from what
 484 I can tell, but it still feels like a hack. Fixing this would require quite a
 485 bit of work, however.
 486 */
 487 docstring BibTeXInfo::expandFormat(docstring const & format,
 488                 BibTeXInfo const * const xref, int & counter, Buffer const & buf,
 489                 docstring before, docstring after, docstring dialog, bool next) const
 490 {
 491         // incorrect use of macros could put us in an infinite loop
 492         static int const max_passes = 5000;
 493         // the use of overly large keys can lead to performance problems, due
 494         // to eventual attempts to convert LaTeX macros to unicode. See bug
 495         // #8944. This is perhaps not the best solution, but it will have to
 496         // do for now.
 497         static size_t const max_keysize = 128;
 498         odocstringstream ret; // return value
 499         string key;
 500         bool scanning_key = false;
 501         bool scanning_rich = false;
 502
 503         CiteEngineType const engine_type = buf.params().citeEngineType();
 504         docstring fmt = format;
 505         // we'll remove characters from the front of fmt as we
 506         // deal with them
 507         while (!fmt.empty()) {
 508                 if (counter > max_passes) {
 509                         LYXERR0("Recursion limit reached while parsing `"
 510                                 << format << "'.");
 511                         return _("ERROR!");
 512                 }
 513
 514                 char_type thischar = fmt[0];
 515                 if (thischar == '%') {
 516                         // beginning or end of key
 517                         if (scanning_key) {
 518                                 // end of key
 519                                 scanning_key = false;
 520                                 // so we replace the key with its value, which may be empty
 521                                 if (key[0] == '!') {
 522                                         // macro
 523                                         string const val =
 524                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 525                                         fmt = from_utf8(val) + fmt.substr(1);
 526                                         counter += 1;
 527                                         continue;
 528                                 } else if (key[0] == '_') {
 529                                         // a translatable bit
 530                                         string const val =
 531                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 532                                         docstring const trans =
 533                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 534                                         ret << trans;
 535                                 } else {
 536                                         docstring const val =
 537                                                 getValueForKey(key, buf, before, after, dialog, xref, max_keysize);
 538                                         if (!scanning_rich)
 539                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 540                                         ret << val;
 541                                         if (!scanning_rich)
 542                                                 ret << from_ascii("{!</span>!}");
 543                                 }
 544                         } else {
 545                                 // beginning of key
 546                                 key.clear();
 547                                 scanning_key = true;
 548                         }
 549                 }
 550                 else if (thischar == '{') {
 551                         // beginning of option?
 552                         if (scanning_key) {
 553                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 554                                 return _("ERROR!");
 555                         }
 556                         if (fmt.size() > 1) {
 557                                 if (fmt[1] == '%') {
 558                                         // it is the beginning of an optional format
 559                                         string optkey;
 560                                         docstring ifpart;
 561                                         docstring elsepart;
 562                                         docstring const newfmt =
 563                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 564                                         if (newfmt == fmt) // parse error
 565                                                 return _("ERROR!");
 566                                         fmt = newfmt;
 567                                         docstring const val =
 568                                                 getValueForKey(optkey, buf, before, after, dialog, xref);
 569                                         if (optkey == "next" && next)
 570                                                 ret << ifpart; // without expansion
 571                                         else if (!val.empty()) {
 572                                                 int newcounter = 0;
 573                                                 ret << expandFormat(ifpart, xref, newcounter, buf,
 574                                                         before, after, dialog, next);
 575                                         } else if (!elsepart.empty()) {
 576                                                 int newcounter = 0;
 577                                                 ret << expandFormat(elsepart, xref, newcounter, buf,
 578                                                         before, after, dialog, next);
 579                                         }
 580                                         // fmt will have been shortened for us already
 581                                         continue;
 582                                 }
 583                                 if (fmt[1] == '!') {
 584                                         // beginning of rich text
 585                                         scanning_rich = true;
 586                                         fmt = fmt.substr(2);
 587                                         ret << from_ascii("{!");
 588                                         continue;
 589                                 }
 590                         }
 591                         // we are here if '{' was not followed by % or !.
 592                         // So it's just a character.
 593                         ret << thischar;
 594                 }
 595                 else if (scanning_rich && thischar == '!'
 596                          && fmt.size() > 1 && fmt[1] == '}') {
 597                         // end of rich text
 598                         scanning_rich = false;
 599                         fmt = fmt.substr(2);
 600                         ret << from_ascii("!}");
 601                         continue;
 602                 }
 603                 else if (scanning_key)
 604                         key += char(thischar);
 605                 else {
 606                         try {
 607                                 ret.put(thischar);
 608                         } catch (EncodingException & /* e */) {
 609                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 610                         }
 611                 }
 612                 fmt = fmt.substr(1);
 613         } // for loop
 614         if (scanning_key) {
 615                 LYXERR0("Never found end of key in `" << format << "'!");
 616                 return _("ERROR!");
 617         }
 618         if (scanning_rich) {
 619                 LYXERR0("Never found end of rich text in `" << format << "'!");
 620                 return _("ERROR!");
 621         }
 622         return ret.str();
 623 }
 624
 625
 626 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
 627         Buffer const & buf, bool richtext) const
 628 {
 629         if (!richtext && !info_.empty())
 630                 return info_;
 631         if (richtext && !info_richtext_.empty())
 632                 return info_richtext_;
 633
 634         if (!is_bibtex_) {
 635                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 636                 info_ = it->second;
 637                 return info_;
 638         }
 639
 640         CiteEngineType const engine_type = buf.params().citeEngineType();
 641         DocumentClass const & dc = buf.params().documentClass();
 642         docstring const & format =
 643                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 644         int counter = 0;
 645         info_ = expandFormat(format, xref, counter, buf,
 646                 docstring(), docstring(), docstring(), false);
 647
 648         if (info_.empty()) {
 649                 // this probably shouldn't happen
 650                 return info_;
 651         }
 652
 653         if (richtext) {
 654                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 655                 return info_richtext_;
 656         }
 657
 658         info_ = convertLaTeXCommands(processRichtext(info_, false));
 659         return info_;
 660 }
 661
 662
 663 docstring const BibTeXInfo::getLabel(BibTeXInfo const * const xref,
 664         Buffer const & buf, docstring const & format, bool richtext,
 665         docstring const & before, docstring const & after,
 666         docstring const & dialog, bool next) const
 667 {
 668         docstring loclabel;
 669
 670         int counter = 0;
 671         loclabel = expandFormat(format, xref, counter, buf,
 672                 before, after, dialog, next);
 673
 674         if (!loclabel.empty() && !next) {
 675                 loclabel = processRichtext(loclabel, richtext);
 676                 loclabel = convertLaTeXCommands(loclabel);
 677         }
 678
 679         return loclabel;
 680 }
 681
 682
 683 docstring const & BibTeXInfo::operator[](docstring const & field) const
 684 {
 685         BibTeXInfo::const_iterator it = find(field);
 686         if (it != end())
 687                 return it->second;
 688         static docstring const empty_value = docstring();
 689         return empty_value;
 690 }
 691
 692
 693 docstring const & BibTeXInfo::operator[](string const & field) const
 694 {
 695         return operator[](from_ascii(field));
 696 }
 697
 698
 699 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 700         docstring const & before, docstring const & after, docstring const & dialog,
 701         BibTeXInfo const * const xref, size_t maxsize) const
 702 {
 703         // anything less is pointless
 704         LASSERT(maxsize >= 16, maxsize = 16);
 705         string key = oldkey;
 706         bool cleanit = false;
 707         if (prefixIs(oldkey, "clean:")) {
 708                 key = oldkey.substr(6);
 709                 cleanit = true;
 710         }
 711
 712         docstring ret = operator[](key);
 713         if (ret.empty() && xref)
 714                 ret = (*xref)[key];
 715         if (ret.empty()) {
 716                 // some special keys
 717                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 718                 if (key == "dialog")
 719                         ret = dialog;
 720                 else if (key == "entrytype")
 721                         ret = entry_type_;
 722                 else if (key == "key")
 723                         ret = bib_key_;
 724                 else if (key == "label")
 725                         ret = label_;
 726                 else if (key == "modifier" && modifier_ != 0)
 727                         ret = modifier_;
 728                 else if (key == "numericallabel")
 729                         ret = cite_number_;
 730                 else if (key == "abbrvauthor")
 731                         // Special key to provide abbreviated author names.
 732                         ret = getAbbreviatedAuthor(&buf, false);
 733                 else if (key == "shortauthor")
 734                         // When shortauthor is not defined, jurabib automatically
 735                         // provides jurabib-style abbreviated author names. We do
 736                         // this as well.
 737                         ret = getAbbreviatedAuthor(&buf, true);
 738                 else if (key == "shorttitle") {
 739                         // When shorttitle is not defined, jurabib uses for `article'
 740                         // and `periodical' entries the form `journal volume [year]'
 741                         // and for other types of entries it uses the `title' field.
 742                         if (entry_type_ == "article" || entry_type_ == "periodical")
 743                                 ret = operator[]("journal") + " " + operator[]("volume")
 744                                         + " [" + operator[]("year") + "]";
 745                         else
 746                                 ret = operator[]("title");
 747                 } else if (key == "bibentry") {
 748                         // Special key to provide the full bibliography entry: see getInfo()
 749                         CiteEngineType const engine_type = buf.params().citeEngineType();
 750                         DocumentClass const & dc = buf.params().documentClass();
 751                         docstring const & format =
 752                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 753                         int counter = 0;
 754                         ret = expandFormat(format, xref, counter, buf,
 755                                 docstring(), docstring(), docstring(), false);
 756                 } else if (key == "textbefore")
 757                         ret = before;
 758                 else if (key == "textafter")
 759                         ret = after;
 760                 else if (key == "year")
 761                         ret = getYear();
 762         }
 763
 764         if (cleanit)
 765                 ret = html::cleanAttr(ret);
 766
 767         // make sure it is not too big
 768         support::truncateWithEllipsis(ret, maxsize);
 769         return ret;
 770 }
 771
 772
 773 //////////////////////////////////////////////////////////////////////
 774 //
 775 // BiblioInfo
 776 //
 777 //////////////////////////////////////////////////////////////////////
 778
 779 namespace {
 780
 781 // A functor for use with sort, leading to case insensitive sorting
 782 class compareNoCase: public binary_function<docstring, docstring, bool>
 783 {
 784 public:
 785         bool operator()(docstring const & s1, docstring const & s2) const {
 786                 return compare_no_case(s1, s2) < 0;
 787         }
 788 };
 789
 790 } // namespace anon
 791
 792
 793 vector<docstring> const BiblioInfo::getKeys() const
 794 {
 795         vector<docstring> bibkeys;
 796         BiblioInfo::const_iterator it  = begin();
 797         for (; it != end(); ++it)
 798                 bibkeys.push_back(it->first);
 799         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 800         return bibkeys;
 801 }
 802
 803
 804 vector<docstring> const BiblioInfo::getFields() const
 805 {
 806         vector<docstring> bibfields;
 807         set<docstring>::const_iterator it = field_names_.begin();
 808         set<docstring>::const_iterator end = field_names_.end();
 809         for (; it != end; ++it)
 810                 bibfields.push_back(*it);
 811         sort(bibfields.begin(), bibfields.end());
 812         return bibfields;
 813 }
 814
 815
 816 vector<docstring> const BiblioInfo::getEntries() const
 817 {
 818         vector<docstring> bibentries;
 819         set<docstring>::const_iterator it = entry_types_.begin();
 820         set<docstring>::const_iterator end = entry_types_.end();
 821         for (; it != end; ++it)
 822                 bibentries.push_back(*it);
 823         sort(bibentries.begin(), bibentries.end());
 824         return bibentries;
 825 }
 826
 827
 828 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 829 {
 830         BiblioInfo::const_iterator it = find(key);
 831         if (it == end())
 832                 return docstring();
 833         BibTeXInfo const & data = it->second;
 834         return data.getAbbreviatedAuthor(&buf, false);
 835 }
 836
 837
 838 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 839 {
 840         BiblioInfo::const_iterator it = find(key);
 841         if (it == end())
 842                 return docstring();
 843         BibTeXInfo const & data = it->second;
 844         return data.citeNumber();
 845 }
 846
 847
 848 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 849 {
 850         BiblioInfo::const_iterator it = find(key);
 851         if (it == end())
 852                 return docstring();
 853         BibTeXInfo const & data = it->second;
 854         docstring year = data.getYear();
 855         if (year.empty()) {
 856                 // let's try the crossref
 857                 docstring const xref = data.getXRef();
 858                 if (xref.empty())
 859                         // no luck
 860                         return docstring();
 861                 BiblioInfo::const_iterator const xrefit = find(xref);
 862                 if (xrefit == end())
 863                         // no luck again
 864                         return docstring();
 865                 BibTeXInfo const & xref_data = xrefit->second;
 866                 year = xref_data.getYear();
 867         }
 868         if (use_modifier && data.modifier() != 0)
 869                 year += data.modifier();
 870         return year;
 871 }
 872
 873
 874 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 875 {
 876         docstring const year = getYear(key, use_modifier);
 877         if (year.empty())
 878                 return buf.B_("No year");
 879         return year;
 880 }
 881
 882
 883 docstring const BiblioInfo::getInfo(docstring const & key,
 884         Buffer const & buf, bool richtext) const
 885 {
 886         BiblioInfo::const_iterator it = find(key);
 887         if (it == end())
 888                 return docstring(_("Bibliography entry not found!"));
 889         BibTeXInfo const & data = it->second;
 890         BibTeXInfo const * xrefptr = 0;
 891         docstring const xref = data.getXRef();
 892         if (!xref.empty()) {
 893                 BiblioInfo::const_iterator const xrefit = find(xref);
 894                 if (xrefit != end())
 895                         xrefptr = &(xrefit->second);
 896         }
 897         return data.getInfo(xrefptr, buf, richtext);
 898 }
 899
 900
 901 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 902         Buffer const & buf, string const & style, bool for_xhtml,
 903         size_t max_size, docstring const & before, docstring const & after,
 904         docstring const & dialog) const
 905 {
 906         // shorter makes no sense
 907         LASSERT(max_size >= 16, max_size = 16);
 908
 909         // we can't display more than 10 of these, anyway
 910         bool const too_many_keys = keys.size() > 10;
 911         if (too_many_keys)
 912                 keys.resize(10);
 913
 914         CiteEngineType const engine_type = buf.params().citeEngineType();
 915         DocumentClass const & dc = buf.params().documentClass();
 916         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 917         docstring ret = format;
 918         vector<docstring>::const_iterator key = keys.begin();
 919         vector<docstring>::const_iterator ken = keys.end();
 920         for (; key != ken; ++key) {
 921                 BiblioInfo::const_iterator it = find(*key);
 922                 BibTeXInfo empty_data;
 923                 empty_data.key(*key);
 924                 BibTeXInfo & data = empty_data;
 925                 BibTeXInfo const * xrefptr = 0;
 926                 if (it != end()) {
 927                         data = it->second;
 928                         docstring const xref = data.getXRef();
 929                         if (!xref.empty()) {
 930                                 BiblioInfo::const_iterator const xrefit = find(xref);
 931                                 if (xrefit != end())
 932                                         xrefptr = &(xrefit->second);
 933                         }
 934                 }
 935                 ret = data.getLabel(xrefptr, buf, ret, for_xhtml,
 936                         before, after, dialog, key + 1 != ken);
 937         }
 938
 939         if (too_many_keys)
 940                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 941         support::truncateWithEllipsis(ret, max_size);
 942         return ret;
 943 }
 944
 945
 946 bool BiblioInfo::isBibtex(docstring const & key) const
 947 {
 948         docstring key1;
 949         split(key, key1, ',');
 950         BiblioInfo::const_iterator it = find(key1);
 951         if (it == end())
 952                 return false;
 953         return it->second.isBibTeX();
 954 }
 955
 956
 957 vector<docstring> const BiblioInfo::getCiteStrings(
 958         vector<docstring> const & keys, vector<CitationStyle> const & styles,
 959         Buffer const & buf, docstring const & before,
 960         docstring const & after, docstring const & dialog, size_t max_size) const
 961 {
 962         if (empty())
 963                 return vector<docstring>();
 964
 965         string style;
 966         vector<docstring> vec(styles.size());
 967         for (size_t i = 0; i != vec.size(); ++i) {
 968                 style = styles[i].cmd;
 969                 vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
 970         }
 971
 972         return vec;
 973 }
 974
 975
 976 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 977 {
 978         bimap_.insert(info.begin(), info.end());
 979         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
 980         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
 981 }
 982
 983
 984 namespace {
 985
 986 // used in xhtml to sort a list of BibTeXInfo objects
 987 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 988 {
 989         docstring const lauth = lhs->getAbbreviatedAuthor();
 990         docstring const rauth = rhs->getAbbreviatedAuthor();
 991         docstring const lyear = lhs->getYear();
 992         docstring const ryear = rhs->getYear();
 993         docstring const ltitl = lhs->operator[]("title");
 994         docstring const rtitl = rhs->operator[]("title");
 995         return  (lauth < rauth)
 996                 || (lauth == rauth && lyear < ryear)
 997                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
 998 }
 999
1000 }
1001
1002
1003 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1004 {
1005         cited_entries_.clear();
1006         // We are going to collect all the citation keys used in the document,
1007         // getting them from the TOC.
1008         // FIXME We may want to collect these differently, in the first case,
1009         // so that we might have them in order of appearance.
1010         set<docstring> citekeys;
1011         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1012         Toc::const_iterator it = toc->begin();
1013         Toc::const_iterator const en = toc->end();
1014         for (; it != en; ++it) {
1015                 if (it->str().empty())
1016                         continue;
1017                 vector<docstring> const keys = getVectorFromString(it->str());
1018                 citekeys.insert(keys.begin(), keys.end());
1019         }
1020         if (citekeys.empty())
1021                 return;
1022
1023         // We have a set of the keys used in this document.
1024         // We will now convert it to a list of the BibTeXInfo objects used in
1025         // this document...
1026         vector<BibTeXInfo const *> bi;
1027         set<docstring>::const_iterator cit = citekeys.begin();
1028         set<docstring>::const_iterator const cen = citekeys.end();
1029         for (; cit != cen; ++cit) {
1030                 BiblioInfo::const_iterator const bt = find(*cit);
1031                 if (bt == end() || !bt->second.isBibTeX())
1032                         continue;
1033                 bi.push_back(&(bt->second));
1034         }
1035         // ...and sort it.
1036         sort(bi.begin(), bi.end(), lSorter);
1037
1038         // Now we can write the sorted keys
1039         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1040         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1041         for (; bit != ben; ++bit)
1042                 cited_entries_.push_back((*bit)->key());
1043 }
1044
1045
1046 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1047 {
1048         collectCitedEntries(buf);
1049         CiteEngineType const engine_type = buf.params().citeEngineType();
1050         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1051
1052         int keynumber = 0;
1053         char modifier = 0;
1054         // used to remember the last one we saw
1055         // we'll be comparing entries to see if we need to add
1056         // modifiers, like "1984a"
1057         map<docstring, BibTeXInfo>::iterator last;
1058
1059         vector<docstring>::const_iterator it = cited_entries_.begin();
1060         vector<docstring>::const_iterator const en = cited_entries_.end();
1061         for (; it != en; ++it) {
1062                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1063                 // this shouldn't happen, but...
1064                 if (biit == bimap_.end())
1065                         // ...fail gracefully, anyway.
1066                         continue;
1067                 BibTeXInfo & entry = biit->second;
1068                 if (numbers) {
1069                         docstring const num = convert<docstring>(++keynumber);
1070                         entry.setCiteNumber(num);
1071                 } else {
1072                         // coverity complains about our derefercing the iterator last,
1073                         // which was not initialized above. but it does get initialized
1074                         // after the first time through the loop, which is the point of
1075                         // the first test.
1076                         // coverity[FORWARD_NULL]
1077                         if (it != cited_entries_.begin()
1078                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1079                             // we access the year via getYear() so as to get it from the xref,
1080                             // if we need to do so
1081                             && getYear(entry.key()) == getYear(last->second.key())) {
1082                                 if (modifier == 0) {
1083                                         // so the last one should have been 'a'
1084                                         last->second.setModifier('a');
1085                                         modifier = 'b';
1086                                 } else if (modifier == 'z')
1087                                         modifier = 'A';
1088                                 else
1089                                         modifier++;
1090                         } else {
1091                                 modifier = 0;
1092                         }
1093                         entry.setModifier(modifier);
1094                         // remember the last one
1095                         last = biit;
1096                 }
1097         }
1098         // Set the labels
1099         it = cited_entries_.begin();
1100         for (; it != en; ++it) {
1101                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1102                 // this shouldn't happen, but...
1103                 if (biit == bimap_.end())
1104                         // ...fail gracefully, anyway.
1105                         continue;
1106                 BibTeXInfo & entry = biit->second;
1107                 if (numbers) {
1108                         entry.label(entry.citeNumber());
1109                 } else {
1110                         docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
1111                         // we do it this way so as to access the xref, if necessary
1112                         // note that this also gives us the modifier
1113                         docstring const year = getYear(*it, buf, true);
1114                         if (!auth.empty() && !year.empty())
1115                                 entry.label(auth + ' ' + year);
1116                         else
1117                                 entry.label(entry.key());
1118                 }
1119         }
1120 }
1121
1122
1123 //////////////////////////////////////////////////////////////////////
1124 //
1125 // CitationStyle
1126 //
1127 //////////////////////////////////////////////////////////////////////
1128
1129
1130 CitationStyle citationStyleFromString(string const & command)
1131 {
1132         CitationStyle cs;
1133         if (command.empty())
1134                 return cs;
1135
1136         string cmd = command;
1137         if (cmd[0] == 'C') {
1138                 cs.forceUpperCase = true;
1139                 cmd[0] = 'c';
1140         }
1141
1142         size_t const n = cmd.size() - 1;
1143         if (cmd[n] == '*') {
1144                 cs.fullAuthorList = true;
1145                 cmd = cmd.substr(0, n);
1146         }
1147
1148         cs.cmd = cmd;
1149         return cs;
1150 }
1151
1152
1153 string citationStyleToString(const CitationStyle & cs)
1154 {
1155         string cmd = cs.cmd;
1156         if (cs.forceUpperCase)
1157                 cmd[0] = uppercase(cmd[0]);
1158         if (cs.fullAuthorList)
1159                 cmd += '*';
1160         return cmd;
1161 }
1162
1163 } // namespace lyx