src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Encoding.h"
  21 #include "InsetIterator.h"
  22 #include "Language.h"
  23 #include "output_xhtml.h"
  24 #include "Paragraph.h"
  25 #include "TextClass.h"
  26 #include "TocBackend.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/gettext.h"
  32 #include "support/lassert.h"
  33 #include "support/lstrings.h"
  34 #include "support/regex.h"
  35 #include "support/textutils.h"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).empty())
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94
  95 // converts a string containing LaTeX commands into unicode
  96 // for display.
  97 docstring convertLaTeXCommands(docstring const & str)
  98 {
  99         docstring val = str;
 100         docstring ret;
 101
 102         bool scanning_cmd = false;
 103         bool scanning_math = false;
 104         bool escaped = false; // used to catch \$, etc.
 105         while (!val.empty()) {
 106                 char_type const ch = val[0];
 107
 108                 // if we're scanning math, we output everything until we
 109                 // find an unescaped $, at which point we break out.
 110                 if (scanning_math) {
 111                         if (escaped)
 112                                 escaped = false;
 113                         else if (ch == '\\')
 114                                 escaped = true;
 115                         else if (ch == '$')
 116                                 scanning_math = false;
 117                         ret += ch;
 118                         val = val.substr(1);
 119                         continue;
 120                 }
 121
 122                 // if we're scanning a command name, then we just
 123                 // discard characters until we hit something that
 124                 // isn't alpha.
 125                 if (scanning_cmd) {
 126                         if (isAlphaASCII(ch)) {
 127                                 val = val.substr(1);
 128                                 escaped = false;
 129                                 continue;
 130                         }
 131                         // so we're done with this command.
 132                         // now we fall through and check this character.
 133                         scanning_cmd = false;
 134                 }
 135
 136                 // was the last character a \? If so, then this is something like:
 137                 // \\ or \$, so we'll just output it. That's probably not always right...
 138                 if (escaped) {
 139                         // exception: output \, as THIN SPACE
 140                         if (ch == ',')
 141                                 ret.push_back(0x2009);
 142                         else
 143                                 ret += ch;
 144                         val = val.substr(1);
 145                         escaped = false;
 146                         continue;
 147                 }
 148
 149                 if (ch == '$') {
 150                         ret += ch;
 151                         val = val.substr(1);
 152                         scanning_math = true;
 153                         continue;
 154                 }
 155
 156                 // we just ignore braces
 157                 if (ch == '{' || ch == '}') {
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // we're going to check things that look like commands, so if
 163                 // this doesn't, just output it.
 164                 if (ch != '\\') {
 165                         ret += ch;
 166                         val = val.substr(1);
 167                         continue;
 168                 }
 169
 170                 // ok, could be a command of some sort
 171                 // let's see if it corresponds to some unicode
 172                 // unicodesymbols has things in the form: \"{u},
 173                 // whereas we may see things like: \"u. So we'll
 174                 // look for that and change it, if necessary.
 175                 // FIXME: This is a sort of mini-tex2lyx.
 176                 //        Use the real tex2lyx instead!
 177                 static lyx::regex const reg("^\\\\\\W\\w");
 178                 if (lyx::regex_search(to_utf8(val), reg)) {
 179                         val.insert(3, from_ascii("}"));
 180                         val.insert(2, from_ascii("{"));
 181                 }
 182                 bool termination;
 183                 docstring rem;
 184                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 185                                 Encodings::TEXT_CMD, termination, rem);
 186                 if (!cnvtd.empty()) {
 187                         // it did, so we'll take that bit and proceed with what's left
 188                         ret += cnvtd;
 189                         val = rem;
 190                         continue;
 191                 }
 192                 // it's a command of some sort
 193                 scanning_cmd = true;
 194                 escaped = true;
 195                 val = val.substr(1);
 196         }
 197         return ret;
 198 }
 199
 200
 201 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 202 docstring processRichtext(docstring const & str, bool richtext)
 203 {
 204         docstring val = str;
 205         docstring ret;
 206
 207         bool scanning_rich = false;
 208         while (!val.empty()) {
 209                 char_type const ch = val[0];
 210                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 211                         // beginning of rich text
 212                         scanning_rich = true;
 213                         val = val.substr(2);
 214                         continue;
 215                 }
 216                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 217                         // end of rich text
 218                         scanning_rich = false;
 219                         val = val.substr(2);
 220                         continue;
 221                 }
 222                 if (richtext) {
 223                         if (scanning_rich)
 224                                 ret += ch;
 225                         else {
 226                                 // we need to escape '<' and '>'
 227                                 if (ch == '<')
 228                                         ret += "&lt;";
 229                                 else if (ch == '>')
 230                                         ret += "&gt;";
 231                                 else
 232                                         ret += ch;
 233                         }
 234                 } else if (!scanning_rich /* && !richtext */)
 235                         ret += ch;
 236                 // else the character is discarded, which will happen only if
 237                 // richtext == false and we are scanning rich text
 238                 val = val.substr(1);
 239         }
 240         return ret;
 241 }
 242
 243 } // anon namespace
 244
 245
 246 //////////////////////////////////////////////////////////////////////
 247 //
 248 // BibTeXInfo
 249 //
 250 //////////////////////////////////////////////////////////////////////
 251
 252 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 253         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 254           modifier_(0)
 255 {}
 256
 257
 258 docstring const BibTeXInfo::getAbbreviatedAuthor(
 259     Buffer const * buf, bool jurabib_style) const
 260 {
 261         if (!is_bibtex_) {
 262                 docstring const opt = label();
 263                 if (opt.empty())
 264                         return docstring();
 265
 266                 docstring authors;
 267                 docstring const remainder = trim(split(opt, authors, '('));
 268                 if (remainder.empty())
 269                         // in this case, we didn't find a "(",
 270                         // so we don't have author (year)
 271                         return docstring();
 272                 return authors;
 273         }
 274
 275         docstring author = operator[]("author");
 276         if (author.empty()) {
 277                 author = operator[]("editor");
 278                 if (author.empty())
 279                         return author;
 280         }
 281
 282         // FIXME Move this to a separate routine that can
 283         // be called from elsewhere.
 284         //
 285         // OK, we've got some names. Let's format them.
 286         // Try to split the author list on " and "
 287         vector<docstring> const authors =
 288                 getVectorFromString(author, from_ascii(" and "));
 289
 290         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 291                 docstring shortauthor = familyName(authors[0])
 292                         + "/" + familyName(authors[1]);
 293                 if (authors.size() == 3)
 294                         shortauthor += "/" + familyName(authors[2]);
 295                 return convertLaTeXCommands(shortauthor);
 296         }
 297
 298         docstring retval = familyName(authors[0]);
 299
 300         if (authors.size() == 2 && authors[1] != "others") {
 301                 docstring const dformat = buf ?
 302                         buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
 303                 retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
 304         } else if (authors.size() >= 2) {
 305                 // we get here either if the author list is longer than two names
 306                 // or if the second 'name' is "others". we do the same thing either
 307                 // way.
 308                 docstring const dformat = buf ?
 309                         buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
 310                 retval = bformat(dformat, familyName(authors[0]));
 311         }
 312
 313         return convertLaTeXCommands(retval);
 314 }
 315
 316
 317 docstring const BibTeXInfo::getYear() const
 318 {
 319         if (is_bibtex_)
 320                 return operator[]("year");
 321
 322         docstring const opt = label();
 323         if (opt.empty())
 324                 return docstring();
 325
 326         docstring authors;
 327         docstring tmp = split(opt, authors, '(');
 328         if (tmp.empty())
 329                 // we don't have author (year)
 330                 return docstring();
 331         docstring year;
 332         tmp = split(tmp, year, ')');
 333         return year;
 334 }
 335
 336
 337 docstring const BibTeXInfo::getXRef() const
 338 {
 339         if (!is_bibtex_)
 340                 return docstring();
 341         return operator[]("crossref");
 342 }
 343
 344
 345 namespace {
 346
 347 docstring parseOptions(docstring const & format, string & optkey,
 348                     docstring & ifpart, docstring & elsepart);
 349
 350 // Calls parseOptions to deal with an embedded option, such as:
 351 //   {%number%[[, no.~%number%]]}
 352 // which must appear at the start of format. ifelsepart gets the
 353 // whole of the option, and we return what's left after the option.
 354 // we return format if there is an error.
 355 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 356 {
 357         LASSERT(format[0] == '{' && format[1] == '%', return format);
 358         string optkey;
 359         docstring ifpart;
 360         docstring elsepart;
 361         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 362         if (format == rest) { // parse error
 363                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 364                 return format;
 365         }
 366         LASSERT(rest.size() <= format.size(),
 367                 { ifelsepart = docstring(); return format; });
 368         ifelsepart = format.substr(0, format.size() - rest.size());
 369         return rest;
 370 }
 371
 372
 373 // Gets a "clause" from a format string, where the clause is
 374 // delimited by '[[' and ']]'. Returns what is left after the
 375 // clause is removed, and returns format if there is an error.
 376 docstring getClause(docstring const & format, docstring & clause)
 377 {
 378         docstring fmt = format;
 379         // remove '[['
 380         fmt = fmt.substr(2);
 381         // we'll remove characters from the front of fmt as we
 382         // deal with them
 383         while (!fmt.empty()) {
 384                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 385                         // that's the end
 386                         fmt = fmt.substr(2);
 387                         break;
 388                 }
 389                 // check for an embedded option
 390                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 391                         docstring part;
 392                         docstring const rest = parseEmbeddedOption(fmt, part);
 393                         if (fmt == rest) {
 394                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 395                                 return format;
 396                         }
 397                         clause += part;
 398                         fmt = rest;
 399                 } else { // it's just a normal character
 400                                 clause += fmt[0];
 401                                 fmt = fmt.substr(1);
 402                 }
 403         }
 404         return fmt;
 405 }
 406
 407
 408 // parse an options string, which must appear at the start of the
 409 // format parameter. puts the parsed bits in optkey, ifpart, and
 410 // elsepart and returns what's left after the option is removed.
 411 // if there's an error, it returns format itself.
 412 docstring parseOptions(docstring const & format, string & optkey,
 413                     docstring & ifpart, docstring & elsepart)
 414 {
 415         LASSERT(format[0] == '{' && format[1] == '%', return format);
 416         // strip '{%'
 417         docstring fmt = format.substr(2);
 418         size_t pos = fmt.find('%'); // end of key
 419         if (pos == string::npos) {
 420                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 421                 return format;
 422         }
 423         optkey = to_utf8(fmt.substr(0, pos));
 424         fmt = fmt.substr(pos + 1);
 425         // [[format]] should be next
 426         if (fmt[0] != '[' || fmt[1] != '[') {
 427                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 428                 return format;
 429         }
 430
 431         docstring curfmt = fmt;
 432         fmt = getClause(curfmt, ifpart);
 433         if (fmt == curfmt) {
 434                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 435                 return format;
 436         }
 437
 438         if (fmt[0] == '}') // we're done, no else clause
 439                 return fmt.substr(1);
 440
 441         // else part should follow
 442         if (fmt[0] != '[' || fmt[1] != '[') {
 443                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 444                 return format;
 445         }
 446
 447         curfmt = fmt;
 448         fmt = getClause(curfmt, elsepart);
 449         // we should be done
 450         if (fmt == curfmt || fmt[0] != '}') {
 451                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 452                 return format;
 453         }
 454         return fmt.substr(1);
 455 }
 456
 457
 458 } // anon namespace
 459
 460 /* FIXME
 461 Bug #9131 revealed an oddity in how we are generating citation information
 462 when more than one key is given. We end up building a longer and longer format
 463 string as we go, which we then have to re-parse, over and over and over again,
 464 rather than generating the information for the individual keys and then putting
 465 all of that together. We do that to deal with the way separators work, from what
 466 I can tell, but it still feels like a hack. Fixing this would require quite a
 467 bit of work, however.
 468 */
 469 docstring BibTeXInfo::expandFormat(docstring const & format,
 470                 BibTeXInfo const * const xref, int & counter, Buffer const & buf,
 471                 docstring before, docstring after, docstring dialog, bool next) const
 472 {
 473         // incorrect use of macros could put us in an infinite loop
 474         static int const max_passes = 5000;
 475         // the use of overly large keys can lead to performance problems, due
 476         // to eventual attempts to convert LaTeX macros to unicode. See bug
 477         // #8944. This is perhaps not the best solution, but it will have to
 478         // do for now.
 479         static size_t const max_keysize = 128;
 480         odocstringstream ret; // return value
 481         string key;
 482         bool scanning_key = false;
 483         bool scanning_rich = false;
 484
 485         CiteEngineType const engine_type = buf.params().citeEngineType();
 486         docstring fmt = format;
 487         // we'll remove characters from the front of fmt as we
 488         // deal with them
 489         while (!fmt.empty()) {
 490                 if (counter > max_passes) {
 491                         LYXERR0("Recursion limit reached while parsing `"
 492                                 << format << "'.");
 493                         return _("ERROR!");
 494                 }
 495
 496                 char_type thischar = fmt[0];
 497                 if (thischar == '%') {
 498                         // beginning or end of key
 499                         if (scanning_key) {
 500                                 // end of key
 501                                 scanning_key = false;
 502                                 // so we replace the key with its value, which may be empty
 503                                 if (key[0] == '!') {
 504                                         // macro
 505                                         string const val =
 506                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 507                                         fmt = from_utf8(val) + fmt.substr(1);
 508                                         counter += 1;
 509                                         continue;
 510                                 } else if (key[0] == '_') {
 511                                         // a translatable bit
 512                                         string const val =
 513                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 514                                         docstring const trans =
 515                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 516                                         ret << trans;
 517                                 } else {
 518                                         docstring const val =
 519                                                 getValueForKey(key, buf, before, after, dialog, xref, max_keysize);
 520                                         if (!scanning_rich)
 521                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 522                                         ret << val;
 523                                         if (!scanning_rich)
 524                                                 ret << from_ascii("{!</span>!}");
 525                                 }
 526                         } else {
 527                                 // beginning of key
 528                                 key.clear();
 529                                 scanning_key = true;
 530                         }
 531                 }
 532                 else if (thischar == '{') {
 533                         // beginning of option?
 534                         if (scanning_key) {
 535                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 536                                 return _("ERROR!");
 537                         }
 538                         if (fmt.size() > 1) {
 539                                 if (fmt[1] == '%') {
 540                                         // it is the beginning of an optional format
 541                                         string optkey;
 542                                         docstring ifpart;
 543                                         docstring elsepart;
 544                                         docstring const newfmt =
 545                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 546                                         if (newfmt == fmt) // parse error
 547                                                 return _("ERROR!");
 548                                         fmt = newfmt;
 549                                         docstring const val =
 550                                                 getValueForKey(optkey, buf, before, after, dialog, xref);
 551                                         if (optkey == "next" && next)
 552                                                 ret << ifpart; // without expansion
 553                                         else if (!val.empty()) {
 554                                                 int newcounter = 0;
 555                                                 ret << expandFormat(ifpart, xref, newcounter, buf,
 556                                                         before, after, dialog, next);
 557                                         } else if (!elsepart.empty()) {
 558                                                 int newcounter = 0;
 559                                                 ret << expandFormat(elsepart, xref, newcounter, buf,
 560                                                         before, after, dialog, next);
 561                                         }
 562                                         // fmt will have been shortened for us already
 563                                         continue;
 564                                 }
 565                                 if (fmt[1] == '!') {
 566                                         // beginning of rich text
 567                                         scanning_rich = true;
 568                                         fmt = fmt.substr(2);
 569                                         ret << from_ascii("{!");
 570                                         continue;
 571                                 }
 572                         }
 573                         // we are here if '{' was not followed by % or !.
 574                         // So it's just a character.
 575                         ret << thischar;
 576                 }
 577                 else if (scanning_rich && thischar == '!'
 578                          && fmt.size() > 1 && fmt[1] == '}') {
 579                         // end of rich text
 580                         scanning_rich = false;
 581                         fmt = fmt.substr(2);
 582                         ret << from_ascii("!}");
 583                         continue;
 584                 }
 585                 else if (scanning_key)
 586                         key += char(thischar);
 587                 else {
 588                         try {
 589                                 ret.put(thischar);
 590                         } catch (EncodingException & /* e */) {
 591                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 592                         }
 593                 }
 594                 fmt = fmt.substr(1);
 595         } // for loop
 596         if (scanning_key) {
 597                 LYXERR0("Never found end of key in `" << format << "'!");
 598                 return _("ERROR!");
 599         }
 600         if (scanning_rich) {
 601                 LYXERR0("Never found end of rich text in `" << format << "'!");
 602                 return _("ERROR!");
 603         }
 604         return ret.str();
 605 }
 606
 607
 608 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
 609         Buffer const & buf, bool richtext) const
 610 {
 611         if (!richtext && !info_.empty())
 612                 return info_;
 613         if (richtext && !info_richtext_.empty())
 614                 return info_richtext_;
 615
 616         if (!is_bibtex_) {
 617                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 618                 info_ = it->second;
 619                 return info_;
 620         }
 621
 622         CiteEngineType const engine_type = buf.params().citeEngineType();
 623         DocumentClass const & dc = buf.params().documentClass();
 624         docstring const & format =
 625                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 626         int counter = 0;
 627         info_ = expandFormat(format, xref, counter, buf,
 628                 docstring(), docstring(), docstring(), false);
 629
 630         if (info_.empty()) {
 631                 // this probably shouldn't happen
 632                 return info_;
 633         }
 634
 635         if (richtext) {
 636                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 637                 return info_richtext_;
 638         }
 639
 640         info_ = convertLaTeXCommands(processRichtext(info_, false));
 641         return info_;
 642 }
 643
 644
 645 docstring const BibTeXInfo::getLabel(BibTeXInfo const * const xref,
 646         Buffer const & buf, docstring const & format, bool richtext,
 647         docstring const & before, docstring const & after,
 648         docstring const & dialog, bool next) const
 649 {
 650         docstring loclabel;
 651
 652         int counter = 0;
 653         loclabel = expandFormat(format, xref, counter, buf,
 654                 before, after, dialog, next);
 655
 656         if (!loclabel.empty() && !next) {
 657                 loclabel = processRichtext(loclabel, richtext);
 658                 loclabel = convertLaTeXCommands(loclabel);
 659         }
 660
 661         return loclabel;
 662 }
 663
 664
 665 docstring const & BibTeXInfo::operator[](docstring const & field) const
 666 {
 667         BibTeXInfo::const_iterator it = find(field);
 668         if (it != end())
 669                 return it->second;
 670         static docstring const empty_value = docstring();
 671         return empty_value;
 672 }
 673
 674
 675 docstring const & BibTeXInfo::operator[](string const & field) const
 676 {
 677         return operator[](from_ascii(field));
 678 }
 679
 680
 681 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 682         docstring const & before, docstring const & after, docstring const & dialog,
 683         BibTeXInfo const * const xref, size_t maxsize) const
 684 {
 685         // anything less is pointless
 686         LASSERT(maxsize >= 16, maxsize = 16);
 687         string key = oldkey;
 688         bool cleanit = false;
 689         if (prefixIs(oldkey, "clean:")) {
 690                 key = oldkey.substr(6);
 691                 cleanit = true;
 692         }
 693
 694         docstring ret = operator[](key);
 695         if (ret.empty() && xref)
 696                 ret = (*xref)[key];
 697         if (ret.empty()) {
 698                 // some special keys
 699                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 700                 if (key == "dialog")
 701                         ret = dialog;
 702                 else if (key == "entrytype")
 703                         ret = entry_type_;
 704                 else if (key == "key")
 705                         ret = bib_key_;
 706                 else if (key == "label")
 707                         ret = label_;
 708                 else if (key == "modifier" && modifier_ != 0)
 709                         ret = modifier_;
 710                 else if (key == "numericallabel")
 711                         ret = cite_number_;
 712                 else if (key == "abbrvauthor")
 713                         // Special key to provide abbreviated author names.
 714                         ret = getAbbreviatedAuthor(&buf, false);
 715                 else if (key == "shortauthor")
 716                         // When shortauthor is not defined, jurabib automatically
 717                         // provides jurabib-style abbreviated author names. We do
 718                         // this as well.
 719                         ret = getAbbreviatedAuthor(&buf, true);
 720                 else if (key == "shorttitle") {
 721                         // When shorttitle is not defined, jurabib uses for `article'
 722                         // and `periodical' entries the form `journal volume [year]'
 723                         // and for other types of entries it uses the `title' field.
 724                         if (entry_type_ == "article" || entry_type_ == "periodical")
 725                                 ret = operator[]("journal") + " " + operator[]("volume")
 726                                         + " [" + operator[]("year") + "]";
 727                         else
 728                                 ret = operator[]("title");
 729                 } else if (key == "bibentry") {
 730                         // Special key to provide the full bibliography entry: see getInfo()
 731                         CiteEngineType const engine_type = buf.params().citeEngineType();
 732                         DocumentClass const & dc = buf.params().documentClass();
 733                         docstring const & format =
 734                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 735                         int counter = 0;
 736                         ret = expandFormat(format, xref, counter, buf,
 737                                 docstring(), docstring(), docstring(), false);
 738                 } else if (key == "textbefore")
 739                         ret = before;
 740                 else if (key == "textafter")
 741                         ret = after;
 742                 else if (key == "year")
 743                         ret = getYear();
 744         }
 745
 746         if (cleanit)
 747                 ret = html::cleanAttr(ret);
 748
 749         // make sure it is not too big
 750         support::truncateWithEllipsis(ret, maxsize);
 751         return ret;
 752 }
 753
 754
 755 //////////////////////////////////////////////////////////////////////
 756 //
 757 // BiblioInfo
 758 //
 759 //////////////////////////////////////////////////////////////////////
 760
 761 namespace {
 762
 763 // A functor for use with sort, leading to case insensitive sorting
 764 class compareNoCase: public binary_function<docstring, docstring, bool>
 765 {
 766 public:
 767         bool operator()(docstring const & s1, docstring const & s2) const {
 768                 return compare_no_case(s1, s2) < 0;
 769         }
 770 };
 771
 772 } // namespace anon
 773
 774
 775 vector<docstring> const BiblioInfo::getKeys() const
 776 {
 777         vector<docstring> bibkeys;
 778         BiblioInfo::const_iterator it  = begin();
 779         for (; it != end(); ++it)
 780                 bibkeys.push_back(it->first);
 781         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 782         return bibkeys;
 783 }
 784
 785
 786 vector<docstring> const BiblioInfo::getFields() const
 787 {
 788         vector<docstring> bibfields;
 789         set<docstring>::const_iterator it = field_names_.begin();
 790         set<docstring>::const_iterator end = field_names_.end();
 791         for (; it != end; ++it)
 792                 bibfields.push_back(*it);
 793         sort(bibfields.begin(), bibfields.end());
 794         return bibfields;
 795 }
 796
 797
 798 vector<docstring> const BiblioInfo::getEntries() const
 799 {
 800         vector<docstring> bibentries;
 801         set<docstring>::const_iterator it = entry_types_.begin();
 802         set<docstring>::const_iterator end = entry_types_.end();
 803         for (; it != end; ++it)
 804                 bibentries.push_back(*it);
 805         sort(bibentries.begin(), bibentries.end());
 806         return bibentries;
 807 }
 808
 809
 810 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 811 {
 812         BiblioInfo::const_iterator it = find(key);
 813         if (it == end())
 814                 return docstring();
 815         BibTeXInfo const & data = it->second;
 816         return data.getAbbreviatedAuthor(&buf, false);
 817 }
 818
 819
 820 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 821 {
 822         BiblioInfo::const_iterator it = find(key);
 823         if (it == end())
 824                 return docstring();
 825         BibTeXInfo const & data = it->second;
 826         return data.citeNumber();
 827 }
 828
 829
 830 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 831 {
 832         BiblioInfo::const_iterator it = find(key);
 833         if (it == end())
 834                 return docstring();
 835         BibTeXInfo const & data = it->second;
 836         docstring year = data.getYear();
 837         if (year.empty()) {
 838                 // let's try the crossref
 839                 docstring const xref = data.getXRef();
 840                 if (xref.empty())
 841                         // no luck
 842                         return docstring();
 843                 BiblioInfo::const_iterator const xrefit = find(xref);
 844                 if (xrefit == end())
 845                         // no luck again
 846                         return docstring();
 847                 BibTeXInfo const & xref_data = xrefit->second;
 848                 year = xref_data.getYear();
 849         }
 850         if (use_modifier && data.modifier() != 0)
 851                 year += data.modifier();
 852         return year;
 853 }
 854
 855
 856 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 857 {
 858         docstring const year = getYear(key, use_modifier);
 859         if (year.empty())
 860                 return buf.B_("No year");
 861         return year;
 862 }
 863
 864
 865 docstring const BiblioInfo::getInfo(docstring const & key,
 866         Buffer const & buf, bool richtext) const
 867 {
 868         BiblioInfo::const_iterator it = find(key);
 869         if (it == end())
 870                 return docstring(_("Bibliography entry not found!"));
 871         BibTeXInfo const & data = it->second;
 872         BibTeXInfo const * xrefptr = 0;
 873         docstring const xref = data.getXRef();
 874         if (!xref.empty()) {
 875                 BiblioInfo::const_iterator const xrefit = find(xref);
 876                 if (xrefit != end())
 877                         xrefptr = &(xrefit->second);
 878         }
 879         return data.getInfo(xrefptr, buf, richtext);
 880 }
 881
 882
 883 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 884         Buffer const & buf, string const & style, bool for_xhtml,
 885         size_t max_size, docstring const & before, docstring const & after,
 886         docstring const & dialog) const
 887 {
 888         // shorter makes no sense
 889         LASSERT(max_size >= 16, max_size = 16);
 890
 891         // we can't display more than 10 of these, anyway
 892         bool const too_many_keys = keys.size() > 10;
 893         if (too_many_keys)
 894                 keys.resize(10);
 895
 896         CiteEngineType const engine_type = buf.params().citeEngineType();
 897         DocumentClass const & dc = buf.params().documentClass();
 898         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 899         docstring ret = format;
 900         vector<docstring>::const_iterator key = keys.begin();
 901         vector<docstring>::const_iterator ken = keys.end();
 902         for (; key != ken; ++key) {
 903                 BiblioInfo::const_iterator it = find(*key);
 904                 BibTeXInfo empty_data;
 905                 empty_data.key(*key);
 906                 BibTeXInfo & data = empty_data;
 907                 BibTeXInfo const * xrefptr = 0;
 908                 if (it != end()) {
 909                         data = it->second;
 910                         docstring const xref = data.getXRef();
 911                         if (!xref.empty()) {
 912                                 BiblioInfo::const_iterator const xrefit = find(xref);
 913                                 if (xrefit != end())
 914                                         xrefptr = &(xrefit->second);
 915                         }
 916                 }
 917                 ret = data.getLabel(xrefptr, buf, ret, for_xhtml,
 918                         before, after, dialog, key + 1 != ken);
 919         }
 920
 921         if (too_many_keys)
 922                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
 923         support::truncateWithEllipsis(ret, max_size);
 924         return ret;
 925 }
 926
 927
 928 bool BiblioInfo::isBibtex(docstring const & key) const
 929 {
 930         docstring key1;
 931         split(key, key1, ',');
 932         BiblioInfo::const_iterator it = find(key1);
 933         if (it == end())
 934                 return false;
 935         return it->second.isBibTeX();
 936 }
 937
 938
 939 vector<docstring> const BiblioInfo::getCiteStrings(
 940         vector<docstring> const & keys, vector<CitationStyle> const & styles,
 941         Buffer const & buf, docstring const & before,
 942         docstring const & after, docstring const & dialog, size_t max_size) const
 943 {
 944         if (empty())
 945                 return vector<docstring>();
 946
 947         string style;
 948         vector<docstring> vec(styles.size());
 949         for (size_t i = 0; i != vec.size(); ++i) {
 950                 style = styles[i].cmd;
 951                 vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
 952         }
 953
 954         return vec;
 955 }
 956
 957
 958 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 959 {
 960         bimap_.insert(info.begin(), info.end());
 961         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
 962         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
 963 }
 964
 965
 966 namespace {
 967
 968 // used in xhtml to sort a list of BibTeXInfo objects
 969 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 970 {
 971         docstring const lauth = lhs->getAbbreviatedAuthor();
 972         docstring const rauth = rhs->getAbbreviatedAuthor();
 973         docstring const lyear = lhs->getYear();
 974         docstring const ryear = rhs->getYear();
 975         docstring const ltitl = lhs->operator[]("title");
 976         docstring const rtitl = rhs->operator[]("title");
 977         return  (lauth < rauth)
 978                 || (lauth == rauth && lyear < ryear)
 979                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
 980 }
 981
 982 }
 983
 984
 985 void BiblioInfo::collectCitedEntries(Buffer const & buf)
 986 {
 987         cited_entries_.clear();
 988         // We are going to collect all the citation keys used in the document,
 989         // getting them from the TOC.
 990         // FIXME We may want to collect these differently, in the first case,
 991         // so that we might have them in order of appearance.
 992         set<docstring> citekeys;
 993         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
 994         Toc::const_iterator it = toc->begin();
 995         Toc::const_iterator const en = toc->end();
 996         for (; it != en; ++it) {
 997                 if (it->str().empty())
 998                         continue;
 999                 vector<docstring> const keys = getVectorFromString(it->str());
1000                 citekeys.insert(keys.begin(), keys.end());
1001         }
1002         if (citekeys.empty())
1003                 return;
1004
1005         // We have a set of the keys used in this document.
1006         // We will now convert it to a list of the BibTeXInfo objects used in
1007         // this document...
1008         vector<BibTeXInfo const *> bi;
1009         set<docstring>::const_iterator cit = citekeys.begin();
1010         set<docstring>::const_iterator const cen = citekeys.end();
1011         for (; cit != cen; ++cit) {
1012                 BiblioInfo::const_iterator const bt = find(*cit);
1013                 if (bt == end() || !bt->second.isBibTeX())
1014                         continue;
1015                 bi.push_back(&(bt->second));
1016         }
1017         // ...and sort it.
1018         sort(bi.begin(), bi.end(), lSorter);
1019
1020         // Now we can write the sorted keys
1021         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1022         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1023         for (; bit != ben; ++bit)
1024                 cited_entries_.push_back((*bit)->key());
1025 }
1026
1027
1028 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1029 {
1030         collectCitedEntries(buf);
1031         CiteEngineType const engine_type = buf.params().citeEngineType();
1032         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1033
1034         int keynumber = 0;
1035         char modifier = 0;
1036         // used to remember the last one we saw
1037         // we'll be comparing entries to see if we need to add
1038         // modifiers, like "1984a"
1039         map<docstring, BibTeXInfo>::iterator last;
1040
1041         vector<docstring>::const_iterator it = cited_entries_.begin();
1042         vector<docstring>::const_iterator const en = cited_entries_.end();
1043         for (; it != en; ++it) {
1044                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1045                 // this shouldn't happen, but...
1046                 if (biit == bimap_.end())
1047                         // ...fail gracefully, anyway.
1048                         continue;
1049                 BibTeXInfo & entry = biit->second;
1050                 if (numbers) {
1051                         docstring const num = convert<docstring>(++keynumber);
1052                         entry.setCiteNumber(num);
1053                 } else {
1054                         if (it != cited_entries_.begin()
1055                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1056                             // we access the year via getYear() so as to get it from the xref,
1057                             // if we need to do so
1058                             && getYear(entry.key()) == getYear(last->second.key())) {
1059                                 if (modifier == 0) {
1060                                         // so the last one should have been 'a'
1061                                         last->second.setModifier('a');
1062                                         modifier = 'b';
1063                                 } else if (modifier == 'z')
1064                                         modifier = 'A';
1065                                 else
1066                                         modifier++;
1067                         } else {
1068                                 modifier = 0;
1069                         }
1070                         entry.setModifier(modifier);
1071                         // remember the last one
1072                         last = biit;
1073                 }
1074         }
1075         // Set the labels
1076         it = cited_entries_.begin();
1077         for (; it != en; ++it) {
1078                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1079                 // this shouldn't happen, but...
1080                 if (biit == bimap_.end())
1081                         // ...fail gracefully, anyway.
1082                         continue;
1083                 BibTeXInfo & entry = biit->second;
1084                 if (numbers) {
1085                         entry.label(entry.citeNumber());
1086                 } else {
1087                         docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
1088                         // we do it this way so as to access the xref, if necessary
1089                         // note that this also gives us the modifier
1090                         docstring const year = getYear(*it, buf, true);
1091                         if (!auth.empty() && !year.empty())
1092                                 entry.label(auth + ' ' + year);
1093                         else
1094                                 entry.label(entry.key());
1095                 }
1096         }
1097 }
1098
1099
1100 //////////////////////////////////////////////////////////////////////
1101 //
1102 // CitationStyle
1103 //
1104 //////////////////////////////////////////////////////////////////////
1105
1106
1107 CitationStyle citationStyleFromString(string const & command)
1108 {
1109         CitationStyle cs;
1110         if (command.empty())
1111                 return cs;
1112
1113         string cmd = command;
1114         if (cmd[0] == 'C') {
1115                 cs.forceUpperCase = true;
1116                 cmd[0] = 'c';
1117         }
1118
1119         size_t const n = cmd.size() - 1;
1120         if (cmd[n] == '*') {
1121                 cs.fullAuthorList = true;
1122                 cmd = cmd.substr(0, n);
1123         }
1124
1125         cs.cmd = cmd;
1126         return cs;
1127 }
1128
1129
1130 string citationStyleToString(const CitationStyle & cs)
1131 {
1132         string cmd = cs.cmd;
1133         if (cs.forceUpperCase)
1134                 cmd[0] = 'C';
1135         if (cs.fullAuthorList)
1136                 cmd += '*';
1137         return cmd;
1138 }
1139
1140 } // namespace lyx