src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Encoding.h"
  21 #include "InsetIterator.h"
  22 #include "Language.h"
  23 #include "output_xhtml.h"
  24 #include "Paragraph.h"
  25 #include "TextClass.h"
  26 #include "TocBackend.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/gettext.h"
  32 #include "support/lassert.h"
  33 #include "support/lstrings.h"
  34 #include "support/regex.h"
  35 #include "support/textutils.h"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).empty())
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94
  95 // converts a string containing LaTeX commands into unicode
  96 // for display.
  97 docstring convertLaTeXCommands(docstring const & str)
  98 {
  99         docstring val = str;
 100         docstring ret;
 101
 102         bool scanning_cmd = false;
 103         bool scanning_math = false;
 104         bool escaped = false; // used to catch \$, etc.
 105         while (!val.empty()) {
 106                 char_type const ch = val[0];
 107
 108                 // if we're scanning math, we output everything until we
 109                 // find an unescaped $, at which point we break out.
 110                 if (scanning_math) {
 111                         if (escaped)
 112                                 escaped = false;
 113                         else if (ch == '\\')
 114                                 escaped = true;
 115                         else if (ch == '$')
 116                                 scanning_math = false;
 117                         ret += ch;
 118                         val = val.substr(1);
 119                         continue;
 120                 }
 121
 122                 // if we're scanning a command name, then we just
 123                 // discard characters until we hit something that
 124                 // isn't alpha.
 125                 if (scanning_cmd) {
 126                         if (isAlphaASCII(ch)) {
 127                                 val = val.substr(1);
 128                                 escaped = false;
 129                                 continue;
 130                         }
 131                         // so we're done with this command.
 132                         // now we fall through and check this character.
 133                         scanning_cmd = false;
 134                 }
 135
 136                 // was the last character a \? If so, then this is something like:
 137                 // \\ or \$, so we'll just output it. That's probably not always right...
 138                 if (escaped) {
 139                         // exception: output \, as THIN SPACE
 140                         if (ch == ',')
 141                                 ret.push_back(0x2009);
 142                         else
 143                                 ret += ch;
 144                         val = val.substr(1);
 145                         escaped = false;
 146                         continue;
 147                 }
 148
 149                 if (ch == '$') {
 150                         ret += ch;
 151                         val = val.substr(1);
 152                         scanning_math = true;
 153                         continue;
 154                 }
 155
 156                 // we just ignore braces
 157                 if (ch == '{' || ch == '}') {
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // we're going to check things that look like commands, so if
 163                 // this doesn't, just output it.
 164                 if (ch != '\\') {
 165                         ret += ch;
 166                         val = val.substr(1);
 167                         continue;
 168                 }
 169
 170                 // ok, could be a command of some sort
 171                 // let's see if it corresponds to some unicode
 172                 // unicodesymbols has things in the form: \"{u},
 173                 // whereas we may see things like: \"u. So we'll
 174                 // look for that and change it, if necessary.
 175                 // FIXME: This is a sort of mini-tex2lyx.
 176                 //        Use the real tex2lyx instead!
 177                 static lyx::regex const reg("^\\\\\\W\\w");
 178                 if (lyx::regex_search(to_utf8(val), reg)) {
 179                         val.insert(3, from_ascii("}"));
 180                         val.insert(2, from_ascii("{"));
 181                 }
 182                 bool termination;
 183                 docstring rem;
 184                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 185                                 Encodings::TEXT_CMD, termination, rem);
 186                 if (!cnvtd.empty()) {
 187                         // it did, so we'll take that bit and proceed with what's left
 188                         ret += cnvtd;
 189                         val = rem;
 190                         continue;
 191                 }
 192                 // it's a command of some sort
 193                 scanning_cmd = true;
 194                 escaped = true;
 195                 val = val.substr(1);
 196         }
 197         return ret;
 198 }
 199
 200
 201 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 202 docstring processRichtext(docstring const & str, bool richtext)
 203 {
 204         docstring val = str;
 205         docstring ret;
 206
 207         bool scanning_rich = false;
 208         while (!val.empty()) {
 209                 char_type const ch = val[0];
 210                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 211                         // beginning of rich text
 212                         scanning_rich = true;
 213                         val = val.substr(2);
 214                         continue;
 215                 }
 216                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 217                         // end of rich text
 218                         scanning_rich = false;
 219                         val = val.substr(2);
 220                         continue;
 221                 }
 222                 if (richtext) {
 223                         if (scanning_rich)
 224                                 ret += ch;
 225                         else {
 226                                 // we need to escape '<' and '>'
 227                                 if (ch == '<')
 228                                         ret += "&lt;";
 229                                 else if (ch == '>')
 230                                         ret += "&gt;";
 231                                 else
 232                                         ret += ch;
 233                         }
 234                 } else if (!scanning_rich /* && !richtext */)
 235                         ret += ch;
 236                 // else the character is discarded, which will happen only if
 237                 // richtext == false and we are scanning rich text
 238                 val = val.substr(1);
 239         }
 240         return ret;
 241 }
 242
 243 } // anon namespace
 244
 245
 246 //////////////////////////////////////////////////////////////////////
 247 //
 248 // BibTeXInfo
 249 //
 250 //////////////////////////////////////////////////////////////////////
 251
 252 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 253         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 254           modifier_(0)
 255 {}
 256
 257
 258 docstring const BibTeXInfo::getAbbreviatedAuthor(bool jurabib_style) const
 259 {
 260         if (!is_bibtex_) {
 261                 docstring const opt = label();
 262                 if (opt.empty())
 263                         return docstring();
 264
 265                 docstring authors;
 266                 docstring const remainder = trim(split(opt, authors, '('));
 267                 if (remainder.empty())
 268                         // in this case, we didn't find a "(",
 269                         // so we don't have author (year)
 270                         return docstring();
 271                 return authors;
 272         }
 273
 274         docstring author = operator[]("author");
 275         if (author.empty()) {
 276                 author = operator[]("editor");
 277                 if (author.empty())
 278                         return author;
 279         }
 280
 281         // FIXME Move this to a separate routine that can
 282         // be called from elsewhere.
 283         //
 284         // OK, we've got some names. Let's format them.
 285         // Try to split the author list on " and "
 286         vector<docstring> const authors =
 287                 getVectorFromString(author, from_ascii(" and "));
 288
 289         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 290                 docstring shortauthor = familyName(authors[0])
 291                         + "/" + familyName(authors[1]);
 292                 if (authors.size() == 3)
 293                         shortauthor += "/" + familyName(authors[2]);
 294                 return convertLaTeXCommands(shortauthor);
 295         }
 296
 297         docstring retval = familyName(authors[0]);
 298
 299         if (authors.size() == 2 && authors[1] != "others")
 300                 retval = bformat(from_ascii("%1$s and %2$s"),
 301                         familyName(authors[0]), familyName(authors[1]));
 302         else if (authors.size() >= 2)
 303                 retval = bformat(from_ascii("%1$s et al."),
 304                         familyName(authors[0]));
 305
 306         return convertLaTeXCommands(retval);
 307 }
 308
 309
 310 docstring const BibTeXInfo::getAbbreviatedAuthor(Buffer const & buf, bool jurabib_style) const
 311 {
 312         docstring const author = getAbbreviatedAuthor(jurabib_style);
 313         if (!is_bibtex_)
 314                 return author;
 315         vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
 316         if (authors.size() == 2)
 317                 return bformat(buf.B_("%1$s and %2$s"), authors[0], authors[1]);
 318         docstring::size_type const idx = author.rfind(from_ascii(" et al."));
 319         if (idx != docstring::npos)
 320                 return bformat(buf.B_("%1$s et al."), author.substr(0, idx));
 321         return author;
 322 }
 323
 324
 325 docstring const BibTeXInfo::getYear() const
 326 {
 327         if (is_bibtex_)
 328                 return operator[]("year");
 329
 330         docstring const opt = label();
 331         if (opt.empty())
 332                 return docstring();
 333
 334         docstring authors;
 335         docstring tmp = split(opt, authors, '(');
 336         if (tmp.empty())
 337                 // we don't have author (year)
 338                 return docstring();
 339         docstring year;
 340         tmp = split(tmp, year, ')');
 341         return year;
 342 }
 343
 344
 345 docstring const BibTeXInfo::getXRef() const
 346 {
 347         if (!is_bibtex_)
 348                 return docstring();
 349         return operator[]("crossref");
 350 }
 351
 352
 353 namespace {
 354
 355 docstring parseOptions(docstring const & format, string & optkey,
 356                     docstring & ifpart, docstring & elsepart);
 357
 358 // Calls parseOptions to deal with an embedded option, such as:
 359 //   {%number%[[, no.~%number%]]}
 360 // which must appear at the start of format. ifelsepart gets the
 361 // whole of the option, and we return what's left after the option.
 362 // we return format if there is an error.
 363 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 364 {
 365         LASSERT(format[0] == '{' && format[1] == '%', return format);
 366         string optkey;
 367         docstring ifpart;
 368         docstring elsepart;
 369         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 370         if (format == rest) { // parse error
 371                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 372                 return format;
 373         }
 374         LASSERT(rest.size() <= format.size(),
 375                 { ifelsepart = docstring(); return format; });
 376         ifelsepart = format.substr(0, format.size() - rest.size());
 377         return rest;
 378 }
 379
 380
 381 // Gets a "clause" from a format string, where the clause is
 382 // delimited by '[[' and ']]'. Returns what is left after the
 383 // clause is removed, and returns format if there is an error.
 384 docstring getClause(docstring const & format, docstring & clause)
 385 {
 386         docstring fmt = format;
 387         // remove '[['
 388         fmt = fmt.substr(2);
 389         // we'll remove characters from the front of fmt as we
 390         // deal with them
 391         while (!fmt.empty()) {
 392                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 393                         // that's the end
 394                         fmt = fmt.substr(2);
 395                         break;
 396                 }
 397                 // check for an embedded option
 398                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 399                         docstring part;
 400                         docstring const rest = parseEmbeddedOption(fmt, part);
 401                         if (fmt == rest) {
 402                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 403                                 return format;
 404                         }
 405                         clause += part;
 406                         fmt = rest;
 407                 } else { // it's just a normal character
 408                                 clause += fmt[0];
 409                                 fmt = fmt.substr(1);
 410                 }
 411         }
 412         return fmt;
 413 }
 414
 415
 416 // parse an options string, which must appear at the start of the
 417 // format parameter. puts the parsed bits in optkey, ifpart, and
 418 // elsepart and returns what's left after the option is removed.
 419 // if there's an error, it returns format itself.
 420 docstring parseOptions(docstring const & format, string & optkey,
 421                     docstring & ifpart, docstring & elsepart)
 422 {
 423         LASSERT(format[0] == '{' && format[1] == '%', return format);
 424         // strip '{%'
 425         docstring fmt = format.substr(2);
 426         size_t pos = fmt.find('%'); // end of key
 427         if (pos == string::npos) {
 428                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 429                 return format;
 430         }
 431         optkey = to_utf8(fmt.substr(0, pos));
 432         fmt = fmt.substr(pos + 1);
 433         // [[format]] should be next
 434         if (fmt[0] != '[' || fmt[1] != '[') {
 435                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 436                 return format;
 437         }
 438
 439         docstring curfmt = fmt;
 440         fmt = getClause(curfmt, ifpart);
 441         if (fmt == curfmt) {
 442                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 443                 return format;
 444         }
 445
 446         if (fmt[0] == '}') // we're done, no else clause
 447                 return fmt.substr(1);
 448
 449         // else part should follow
 450         if (fmt[0] != '[' || fmt[1] != '[') {
 451                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 452                 return format;
 453         }
 454
 455         curfmt = fmt;
 456         fmt = getClause(curfmt, elsepart);
 457         // we should be done
 458         if (fmt == curfmt || fmt[0] != '}') {
 459                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 460                 return format;
 461         }
 462         return fmt.substr(1);
 463 }
 464
 465
 466 } // anon namespace
 467
 468 /* FIXME
 469 Bug #9131 revealed an oddity in how we are generating citation information
 470 when more than one key is given. We end up building a longer and longer format
 471 string as we go, which we then have to re-parse, over and over and over again,
 472 rather than generating the information for the individual keys and then putting
 473 all of that together. We do that to deal with the way separators work, from what
 474 I can tell, but it still feels like a hack. Fixing this would require quite a
 475 bit of work, however.
 476 */
 477 docstring BibTeXInfo::expandFormat(docstring const & format,
 478                 BibTeXInfo const * const xref, int & counter, Buffer const & buf,
 479                 docstring before, docstring after, docstring dialog, bool next) const
 480 {
 481         // incorrect use of macros could put us in an infinite loop
 482         static int const max_passes = 5000;
 483         // the use of overly large keys can lead to performance problems, due
 484         // to eventual attempts to convert LaTeX macros to unicode. See bug
 485         // #8944. This is perhaps not the best solution, but it will have to
 486         // do for now.
 487         static size_t const max_keysize = 128;
 488         odocstringstream ret; // return value
 489         string key;
 490         bool scanning_key = false;
 491         bool scanning_rich = false;
 492
 493         CiteEngineType const engine_type = buf.params().citeEngineType();
 494         docstring fmt = format;
 495         // we'll remove characters from the front of fmt as we
 496         // deal with them
 497         while (!fmt.empty()) {
 498                 if (counter > max_passes) {
 499                         LYXERR0("Recursion limit reached while parsing `"
 500                                 << format << "'.");
 501                         return _("ERROR!");
 502                 }
 503
 504                 char_type thischar = fmt[0];
 505                 if (thischar == '%') {
 506                         // beginning or end of key
 507                         if (scanning_key) {
 508                                 // end of key
 509                                 scanning_key = false;
 510                                 // so we replace the key with its value, which may be empty
 511                                 if (key[0] == '!') {
 512                                         // macro
 513                                         string const val =
 514                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 515                                         fmt = from_utf8(val) + fmt.substr(1);
 516                                         counter += 1;
 517                                         continue;
 518                                 } else if (key[0] == '_') {
 519                                         // a translatable bit
 520                                         string const val =
 521                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 522                                         docstring const trans =
 523                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 524                                         ret << trans;
 525                                 } else {
 526                                         docstring const val =
 527                                                 getValueForKey(key, buf, before, after, dialog, xref, max_keysize);
 528                                         if (!scanning_rich)
 529                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 530                                         ret << val;
 531                                         if (!scanning_rich)
 532                                                 ret << from_ascii("{!</span>!}");
 533                                 }
 534                         } else {
 535                                 // beginning of key
 536                                 key.clear();
 537                                 scanning_key = true;
 538                         }
 539                 }
 540                 else if (thischar == '{') {
 541                         // beginning of option?
 542                         if (scanning_key) {
 543                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 544                                 return _("ERROR!");
 545                         }
 546                         if (fmt.size() > 1) {
 547                                 if (fmt[1] == '%') {
 548                                         // it is the beginning of an optional format
 549                                         string optkey;
 550                                         docstring ifpart;
 551                                         docstring elsepart;
 552                                         docstring const newfmt =
 553                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 554                                         if (newfmt == fmt) // parse error
 555                                                 return _("ERROR!");
 556                                         fmt = newfmt;
 557                                         docstring const val =
 558                                                 getValueForKey(optkey, buf, before, after, dialog, xref);
 559                                         if (optkey == "next" && next)
 560                                                 ret << ifpart; // without expansion
 561                                         else if (!val.empty()) {
 562                                                 int newcounter = 0;
 563                                                 ret << expandFormat(ifpart, xref, newcounter, buf,
 564                                                         before, after, dialog, next);
 565                                         } else if (!elsepart.empty()) {
 566                                                 int newcounter = 0;
 567                                                 ret << expandFormat(elsepart, xref, newcounter, buf,
 568                                                         before, after, dialog, next);
 569                                         }
 570                                         // fmt will have been shortened for us already
 571                                         continue;
 572                                 }
 573                                 if (fmt[1] == '!') {
 574                                         // beginning of rich text
 575                                         scanning_rich = true;
 576                                         fmt = fmt.substr(2);
 577                                         ret << from_ascii("{!");
 578                                         continue;
 579                                 }
 580                         }
 581                         // we are here if '{' was not followed by % or !.
 582                         // So it's just a character.
 583                         ret << thischar;
 584                 }
 585                 else if (scanning_rich && thischar == '!'
 586                          && fmt.size() > 1 && fmt[1] == '}') {
 587                         // end of rich text
 588                         scanning_rich = false;
 589                         fmt = fmt.substr(2);
 590                         ret << from_ascii("!}");
 591                         continue;
 592                 }
 593                 else if (scanning_key)
 594                         key += char(thischar);
 595                 else {
 596                         try {
 597                                 ret.put(thischar);
 598                         } catch (EncodingException & /* e */) {
 599                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 600                         }
 601                 }
 602                 fmt = fmt.substr(1);
 603         } // for loop
 604         if (scanning_key) {
 605                 LYXERR0("Never found end of key in `" << format << "'!");
 606                 return _("ERROR!");
 607         }
 608         if (scanning_rich) {
 609                 LYXERR0("Never found end of rich text in `" << format << "'!");
 610                 return _("ERROR!");
 611         }
 612         return ret.str();
 613 }
 614
 615
 616 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
 617         Buffer const & buf, bool richtext) const
 618 {
 619         if (!richtext && !info_.empty())
 620                 return info_;
 621         if (richtext && !info_richtext_.empty())
 622                 return info_richtext_;
 623
 624         if (!is_bibtex_) {
 625                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 626                 info_ = it->second;
 627                 return info_;
 628         }
 629
 630         CiteEngineType const engine_type = buf.params().citeEngineType();
 631         DocumentClass const & dc = buf.params().documentClass();
 632         docstring const & format =
 633                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 634         int counter = 0;
 635         info_ = expandFormat(format, xref, counter, buf,
 636                 docstring(), docstring(), docstring(), false);
 637
 638         if (info_.empty()) {
 639                 // this probably shouldn't happen
 640                 return info_;
 641         }
 642
 643         if (richtext) {
 644                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 645                 return info_richtext_;
 646         }
 647
 648         info_ = convertLaTeXCommands(processRichtext(info_, false));
 649         return info_;
 650 }
 651
 652
 653 docstring const BibTeXInfo::getLabel(BibTeXInfo const * const xref,
 654         Buffer const & buf, docstring const & format, bool richtext,
 655         docstring const & before, docstring const & after,
 656         docstring const & dialog, bool next) const
 657 {
 658         docstring loclabel;
 659
 660         int counter = 0;
 661         loclabel = expandFormat(format, xref, counter, buf,
 662                 before, after, dialog, next);
 663
 664         if (!loclabel.empty() && !next) {
 665                 loclabel = processRichtext(loclabel, richtext);
 666                 loclabel = convertLaTeXCommands(loclabel);
 667         }
 668
 669         return loclabel;
 670 }
 671
 672
 673 docstring const & BibTeXInfo::operator[](docstring const & field) const
 674 {
 675         BibTeXInfo::const_iterator it = find(field);
 676         if (it != end())
 677                 return it->second;
 678         static docstring const empty_value = docstring();
 679         return empty_value;
 680 }
 681
 682
 683 docstring const & BibTeXInfo::operator[](string const & field) const
 684 {
 685         return operator[](from_ascii(field));
 686 }
 687
 688
 689 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 690         docstring const & before, docstring const & after, docstring const & dialog,
 691         BibTeXInfo const * const xref, size_t maxsize) const
 692 {
 693         // anything less is pointless
 694         LASSERT(maxsize >= 16, maxsize = 16);
 695         string key = oldkey;
 696         bool cleanit = false;
 697         if (prefixIs(oldkey, "clean:")) {
 698                 key = oldkey.substr(6);
 699                 cleanit = true;
 700         }
 701
 702         docstring ret = operator[](key);
 703         if (ret.empty() && xref)
 704                 ret = (*xref)[key];
 705         if (ret.empty()) {
 706                 // some special keys
 707                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 708                 if (key == "dialog")
 709                         ret = dialog;
 710                 else if (key == "entrytype")
 711                         ret = entry_type_;
 712                 else if (key == "key")
 713                         ret = bib_key_;
 714                 else if (key == "label")
 715                         ret = label_;
 716                 else if (key == "modifier" && modifier_ != 0)
 717                         ret = modifier_;
 718                 else if (key == "numericallabel")
 719                         ret = cite_number_;
 720                 else if (key == "abbrvauthor")
 721                         // Special key to provide abbreviated author names.
 722                         ret = getAbbreviatedAuthor(buf, false);
 723                 else if (key == "shortauthor")
 724                         // When shortauthor is not defined, jurabib automatically
 725                         // provides jurabib-style abbreviated author names. We do
 726                         // this as well.
 727                         ret = getAbbreviatedAuthor(buf, true);
 728                 else if (key == "shorttitle") {
 729                         // When shorttitle is not defined, jurabib uses for `article'
 730                         // and `periodical' entries the form `journal volume [year]'
 731                         // and for other types of entries it uses the `title' field.
 732                         if (entry_type_ == "article" || entry_type_ == "periodical")
 733                                 ret = operator[]("journal") + " " + operator[]("volume")
 734                                         + " [" + operator[]("year") + "]";
 735                         else
 736                                 ret = operator[]("title");
 737                 } else if (key == "bibentry") {
 738                         // Special key to provide the full bibliography entry: see getInfo()
 739                         CiteEngineType const engine_type = buf.params().citeEngineType();
 740                         DocumentClass const & dc = buf.params().documentClass();
 741                         docstring const & format =
 742                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 743                         int counter = 0;
 744                         ret = expandFormat(format, xref, counter, buf,
 745                                 docstring(), docstring(), docstring(), false);
 746                 } else if (key == "textbefore")
 747                         ret = before;
 748                 else if (key == "textafter")
 749                         ret = after;
 750                 else if (key == "year")
 751                         ret = getYear();
 752         }
 753
 754         if (cleanit)
 755                 ret = html::cleanAttr(ret);
 756
 757         // make sure it is not too big
 758         support::truncateWithEllipsis(ret, maxsize);
 759         return ret;
 760 }
 761
 762
 763 //////////////////////////////////////////////////////////////////////
 764 //
 765 // BiblioInfo
 766 //
 767 //////////////////////////////////////////////////////////////////////
 768
 769 namespace {
 770
 771 // A functor for use with sort, leading to case insensitive sorting
 772 class compareNoCase: public binary_function<docstring, docstring, bool>
 773 {
 774 public:
 775         bool operator()(docstring const & s1, docstring const & s2) const {
 776                 return compare_no_case(s1, s2) < 0;
 777         }
 778 };
 779
 780 } // namespace anon
 781
 782
 783 vector<docstring> const BiblioInfo::getKeys() const
 784 {
 785         vector<docstring> bibkeys;
 786         BiblioInfo::const_iterator it  = begin();
 787         for (; it != end(); ++it)
 788                 bibkeys.push_back(it->first);
 789         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 790         return bibkeys;
 791 }
 792
 793
 794 vector<docstring> const BiblioInfo::getFields() const
 795 {
 796         vector<docstring> bibfields;
 797         set<docstring>::const_iterator it = field_names_.begin();
 798         set<docstring>::const_iterator end = field_names_.end();
 799         for (; it != end; ++it)
 800                 bibfields.push_back(*it);
 801         sort(bibfields.begin(), bibfields.end());
 802         return bibfields;
 803 }
 804
 805
 806 vector<docstring> const BiblioInfo::getEntries() const
 807 {
 808         vector<docstring> bibentries;
 809         set<docstring>::const_iterator it = entry_types_.begin();
 810         set<docstring>::const_iterator end = entry_types_.end();
 811         for (; it != end; ++it)
 812                 bibentries.push_back(*it);
 813         sort(bibentries.begin(), bibentries.end());
 814         return bibentries;
 815 }
 816
 817
 818 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 819 {
 820         BiblioInfo::const_iterator it = find(key);
 821         if (it == end())
 822                 return docstring();
 823         BibTeXInfo const & data = it->second;
 824         return data.getAbbreviatedAuthor(buf, false);
 825 }
 826
 827
 828 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 829 {
 830         BiblioInfo::const_iterator it = find(key);
 831         if (it == end())
 832                 return docstring();
 833         BibTeXInfo const & data = it->second;
 834         return data.citeNumber();
 835 }
 836
 837
 838 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 839 {
 840         BiblioInfo::const_iterator it = find(key);
 841         if (it == end())
 842                 return docstring();
 843         BibTeXInfo const & data = it->second;
 844         docstring year = data.getYear();
 845         if (year.empty()) {
 846                 // let's try the crossref
 847                 docstring const xref = data.getXRef();
 848                 if (xref.empty())
 849                         // no luck
 850                         return docstring();
 851                 BiblioInfo::const_iterator const xrefit = find(xref);
 852                 if (xrefit == end())
 853                         // no luck again
 854                         return docstring();
 855                 BibTeXInfo const & xref_data = xrefit->second;
 856                 year = xref_data.getYear();
 857         }
 858         if (use_modifier && data.modifier() != 0)
 859                 year += data.modifier();
 860         return year;
 861 }
 862
 863
 864 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 865 {
 866         docstring const year = getYear(key, use_modifier);
 867         if (year.empty())
 868                 return buf.B_("No year");
 869         return year;
 870 }
 871
 872
 873 docstring const BiblioInfo::getInfo(docstring const & key,
 874         Buffer const & buf, bool richtext) const
 875 {
 876         BiblioInfo::const_iterator it = find(key);
 877         if (it == end())
 878                 return docstring(_("Bibliography entry not found!"));
 879         BibTeXInfo const & data = it->second;
 880         BibTeXInfo const * xrefptr = 0;
 881         docstring const xref = data.getXRef();
 882         if (!xref.empty()) {
 883                 BiblioInfo::const_iterator const xrefit = find(xref);
 884                 if (xrefit != end())
 885                         xrefptr = &(xrefit->second);
 886         }
 887         return data.getInfo(xrefptr, buf, richtext);
 888 }
 889
 890
 891 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 892         Buffer const & buf, string const & style, bool for_xhtml,
 893         size_t max_size, docstring const & before, docstring const & after,
 894         docstring const & dialog) const
 895 {
 896         // shorter makes no sense
 897         LASSERT(max_size >= 16, max_size = 16);
 898
 899         // we can't display more than 10 of these, anyway
 900         bool const too_many_keys = keys.size() > 10;
 901         if (too_many_keys)
 902                 keys.resize(10);
 903
 904         CiteEngineType const engine_type = buf.params().citeEngineType();
 905         DocumentClass const & dc = buf.params().documentClass();
 906         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 907         docstring ret = format;
 908         vector<docstring>::const_iterator key = keys.begin();
 909         vector<docstring>::const_iterator ken = keys.end();
 910         for (; key != ken; ++key) {
 911                 BiblioInfo::const_iterator it = find(*key);
 912                 BibTeXInfo empty_data;
 913                 empty_data.key(*key);
 914                 BibTeXInfo & data = empty_data;
 915                 BibTeXInfo const * xrefptr = 0;
 916                 if (it != end()) {
 917                         data = it->second;
 918                         docstring const xref = data.getXRef();
 919                         if (!xref.empty()) {
 920                                 BiblioInfo::const_iterator const xrefit = find(xref);
 921                                 if (xrefit != end())
 922                                         xrefptr = &(xrefit->second);
 923                         }
 924                 }
 925                 ret = data.getLabel(xrefptr, buf, ret, for_xhtml,
 926                         before, after, dialog, key + 1 != ken);
 927         }
 928
 929         if (too_many_keys)
 930                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 931         support::truncateWithEllipsis(ret, max_size);
 932         return ret;
 933 }
 934
 935
 936 bool BiblioInfo::isBibtex(docstring const & key) const
 937 {
 938         docstring key1;
 939         split(key, key1, ',');
 940         BiblioInfo::const_iterator it = find(key1);
 941         if (it == end())
 942                 return false;
 943         return it->second.isBibTeX();
 944 }
 945
 946
 947 vector<docstring> const BiblioInfo::getCiteStrings(
 948         vector<docstring> const & keys, vector<CitationStyle> const & styles,
 949         Buffer const & buf, docstring const & before,
 950         docstring const & after, docstring const & dialog, size_t max_size) const
 951 {
 952         if (empty())
 953                 return vector<docstring>();
 954
 955         string style;
 956         vector<docstring> vec(styles.size());
 957         for (size_t i = 0; i != vec.size(); ++i) {
 958                 style = styles[i].cmd;
 959                 vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
 960         }
 961
 962         return vec;
 963 }
 964
 965
 966 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 967 {
 968         bimap_.insert(info.begin(), info.end());
 969         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
 970         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
 971 }
 972
 973
 974 namespace {
 975
 976 // used in xhtml to sort a list of BibTeXInfo objects
 977 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 978 {
 979         docstring const lauth = lhs->getAbbreviatedAuthor();
 980         docstring const rauth = rhs->getAbbreviatedAuthor();
 981         docstring const lyear = lhs->getYear();
 982         docstring const ryear = rhs->getYear();
 983         docstring const ltitl = lhs->operator[]("title");
 984         docstring const rtitl = rhs->operator[]("title");
 985         return  (lauth < rauth)
 986                 || (lauth == rauth && lyear < ryear)
 987                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
 988 }
 989
 990 }
 991
 992
 993 void BiblioInfo::collectCitedEntries(Buffer const & buf)
 994 {
 995         cited_entries_.clear();
 996         // We are going to collect all the citation keys used in the document,
 997         // getting them from the TOC.
 998         // FIXME We may want to collect these differently, in the first case,
 999         // so that we might have them in order of appearance.
1000         set<docstring> citekeys;
1001         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1002         Toc::const_iterator it = toc->begin();
1003         Toc::const_iterator const en = toc->end();
1004         for (; it != en; ++it) {
1005                 if (it->str().empty())
1006                         continue;
1007                 vector<docstring> const keys = getVectorFromString(it->str());
1008                 citekeys.insert(keys.begin(), keys.end());
1009         }
1010         if (citekeys.empty())
1011                 return;
1012
1013         // We have a set of the keys used in this document.
1014         // We will now convert it to a list of the BibTeXInfo objects used in
1015         // this document...
1016         vector<BibTeXInfo const *> bi;
1017         set<docstring>::const_iterator cit = citekeys.begin();
1018         set<docstring>::const_iterator const cen = citekeys.end();
1019         for (; cit != cen; ++cit) {
1020                 BiblioInfo::const_iterator const bt = find(*cit);
1021                 if (bt == end() || !bt->second.isBibTeX())
1022                         continue;
1023                 bi.push_back(&(bt->second));
1024         }
1025         // ...and sort it.
1026         sort(bi.begin(), bi.end(), lSorter);
1027
1028         // Now we can write the sorted keys
1029         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1030         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1031         for (; bit != ben; ++bit)
1032                 cited_entries_.push_back((*bit)->key());
1033 }
1034
1035
1036 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1037 {
1038         collectCitedEntries(buf);
1039         CiteEngineType const engine_type = buf.params().citeEngineType();
1040         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1041
1042         int keynumber = 0;
1043         char modifier = 0;
1044         // used to remember the last one we saw
1045         // we'll be comparing entries to see if we need to add
1046         // modifiers, like "1984a"
1047         map<docstring, BibTeXInfo>::iterator last;
1048
1049         vector<docstring>::const_iterator it = cited_entries_.begin();
1050         vector<docstring>::const_iterator const en = cited_entries_.end();
1051         for (; it != en; ++it) {
1052                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1053                 // this shouldn't happen, but...
1054                 if (biit == bimap_.end())
1055                         // ...fail gracefully, anyway.
1056                         continue;
1057                 BibTeXInfo & entry = biit->second;
1058                 if (numbers) {
1059                         docstring const num = convert<docstring>(++keynumber);
1060                         entry.setCiteNumber(num);
1061                 } else {
1062                         if (it != cited_entries_.begin()
1063                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1064                             // we access the year via getYear() so as to get it from the xref,
1065                             // if we need to do so
1066                             && getYear(entry.key()) == getYear(last->second.key())) {
1067                                 if (modifier == 0) {
1068                                         // so the last one should have been 'a'
1069                                         last->second.setModifier('a');
1070                                         modifier = 'b';
1071                                 } else if (modifier == 'z')
1072                                         modifier = 'A';
1073                                 else
1074                                         modifier++;
1075                         } else {
1076                                 modifier = 0;
1077                         }
1078                         entry.setModifier(modifier);
1079                         // remember the last one
1080                         last = biit;
1081                 }
1082         }
1083         // Set the labels
1084         it = cited_entries_.begin();
1085         for (; it != en; ++it) {
1086                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1087                 // this shouldn't happen, but...
1088                 if (biit == bimap_.end())
1089                         // ...fail gracefully, anyway.
1090                         continue;
1091                 BibTeXInfo & entry = biit->second;
1092                 if (numbers) {
1093                         entry.label(entry.citeNumber());
1094                 } else {
1095                         docstring const auth = entry.getAbbreviatedAuthor(buf, false);
1096                         // we do it this way so as to access the xref, if necessary
1097                         // note that this also gives us the modifier
1098                         docstring const year = getYear(*it, buf, true);
1099                         if (!auth.empty() && !year.empty())
1100                                 entry.label(auth + ' ' + year);
1101                         else
1102                                 entry.label(entry.key());
1103                 }
1104         }
1105 }
1106
1107
1108 //////////////////////////////////////////////////////////////////////
1109 //
1110 // CitationStyle
1111 //
1112 //////////////////////////////////////////////////////////////////////
1113
1114
1115 CitationStyle citationStyleFromString(string const & command)
1116 {
1117         CitationStyle cs;
1118         if (command.empty())
1119                 return cs;
1120
1121         string cmd = command;
1122         if (cmd[0] == 'C') {
1123                 cs.forceUpperCase = true;
1124                 cmd[0] = 'c';
1125         }
1126
1127         size_t const n = cmd.size() - 1;
1128         if (cmd[n] == '*') {
1129                 cs.fullAuthorList = true;
1130                 cmd = cmd.substr(0, n);
1131         }
1132
1133         cs.cmd = cmd;
1134         return cs;
1135 }
1136
1137
1138 string citationStyleToString(const CitationStyle & cs)
1139 {
1140         string cmd = cs.cmd;
1141         if (cs.forceUpperCase)
1142                 cmd[0] = 'C';
1143         if (cs.fullAuthorList)
1144                 cmd += '*';
1145         return cmd;
1146 }
1147
1148 } // namespace lyx