src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  *
  11  * Full author contact details are available in file CREDITS.
  12  */
  13
  14 #include <config.h>
  15
  16 #include "BiblioInfo.h"
  17 #include "Buffer.h"
  18 #include "BufferParams.h"
  19 #include "buffer_funcs.h"
  20 #include "Encoding.h"
  21 #include "InsetIterator.h"
  22 #include "Language.h"
  23 #include "output_xhtml.h"
  24 #include "Paragraph.h"
  25 #include "TextClass.h"
  26 #include "TocBackend.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/gettext.h"
  32 #include "support/lassert.h"
  33 #include "support/lstrings.h"
  34 #include "support/regex.h"
  35 #include "support/textutils.h"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).empty())
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94
  95 // converts a string containing LaTeX commands into unicode
  96 // for display.
  97 docstring convertLaTeXCommands(docstring const & str)
  98 {
  99         docstring val = str;
 100         docstring ret;
 101
 102         bool scanning_cmd = false;
 103         bool scanning_math = false;
 104         bool escaped = false; // used to catch \$, etc.
 105         while (!val.empty()) {
 106                 char_type const ch = val[0];
 107
 108                 // if we're scanning math, we output everything until we
 109                 // find an unescaped $, at which point we break out.
 110                 if (scanning_math) {
 111                         if (escaped)
 112                                 escaped = false;
 113                         else if (ch == '\\')
 114                                 escaped = true;
 115                         else if (ch == '$')
 116                                 scanning_math = false;
 117                         ret += ch;
 118                         val = val.substr(1);
 119                         continue;
 120                 }
 121
 122                 // if we're scanning a command name, then we just
 123                 // discard characters until we hit something that
 124                 // isn't alpha.
 125                 if (scanning_cmd) {
 126                         if (isAlphaASCII(ch)) {
 127                                 val = val.substr(1);
 128                                 escaped = false;
 129                                 continue;
 130                         }
 131                         // so we're done with this command.
 132                         // now we fall through and check this character.
 133                         scanning_cmd = false;
 134                 }
 135
 136                 // was the last character a \? If so, then this is something like:
 137                 // \\ or \$, so we'll just output it. That's probably not always right...
 138                 if (escaped) {
 139                         // exception: output \, as THIN SPACE
 140                         if (ch == ',')
 141                                 ret.push_back(0x2009);
 142                         else
 143                                 ret += ch;
 144                         val = val.substr(1);
 145                         escaped = false;
 146                         continue;
 147                 }
 148
 149                 if (ch == '$') {
 150                         ret += ch;
 151                         val = val.substr(1);
 152                         scanning_math = true;
 153                         continue;
 154                 }
 155
 156                 // we just ignore braces
 157                 if (ch == '{' || ch == '}') {
 158                         val = val.substr(1);
 159                         continue;
 160                 }
 161
 162                 // we're going to check things that look like commands, so if
 163                 // this doesn't, just output it.
 164                 if (ch != '\\') {
 165                         ret += ch;
 166                         val = val.substr(1);
 167                         continue;
 168                 }
 169
 170                 // ok, could be a command of some sort
 171                 // let's see if it corresponds to some unicode
 172                 // unicodesymbols has things in the form: \"{u},
 173                 // whereas we may see things like: \"u. So we'll
 174                 // look for that and change it, if necessary.
 175                 // FIXME: This is a sort of mini-tex2lyx.
 176                 //        Use the real tex2lyx instead!
 177                 static lyx::regex const reg("^\\\\\\W\\w");
 178                 if (lyx::regex_search(to_utf8(val), reg)) {
 179                         val.insert(3, from_ascii("}"));
 180                         val.insert(2, from_ascii("{"));
 181                 }
 182                 bool termination;
 183                 docstring rem;
 184                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 185                                 Encodings::TEXT_CMD, termination, rem);
 186                 if (!cnvtd.empty()) {
 187                         // it did, so we'll take that bit and proceed with what's left
 188                         ret += cnvtd;
 189                         val = rem;
 190                         continue;
 191                 }
 192                 // it's a command of some sort
 193                 scanning_cmd = true;
 194                 escaped = true;
 195                 val = val.substr(1);
 196         }
 197         return ret;
 198 }
 199
 200
 201 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 202 docstring processRichtext(docstring const & str, bool richtext)
 203 {
 204         docstring val = str;
 205         docstring ret;
 206
 207         bool scanning_rich = false;
 208         while (!val.empty()) {
 209                 char_type const ch = val[0];
 210                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 211                         // beginning of rich text
 212                         scanning_rich = true;
 213                         val = val.substr(2);
 214                         continue;
 215                 }
 216                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 217                         // end of rich text
 218                         scanning_rich = false;
 219                         val = val.substr(2);
 220                         continue;
 221                 }
 222                 if (richtext) {
 223                         if (scanning_rich)
 224                                 ret += ch;
 225                         else {
 226                                 // we need to escape '<' and '>'
 227                                 if (ch == '<')
 228                                         ret += "&lt;";
 229                                 else if (ch == '>')
 230                                         ret += "&gt;";
 231                                 else
 232                                         ret += ch;
 233                         }
 234                 } else if (!scanning_rich /* && !richtext */)
 235                         ret += ch;
 236                 // else the character is discarded, which will happen only if
 237                 // richtext == false and we are scanning rich text
 238                 val = val.substr(1);
 239         }
 240         return ret;
 241 }
 242
 243 } // anon namespace
 244
 245
 246 //////////////////////////////////////////////////////////////////////
 247 //
 248 // BibTeXInfo
 249 //
 250 //////////////////////////////////////////////////////////////////////
 251
 252 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 253         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 254           modifier_(0)
 255 {}
 256
 257
 258 docstring const BibTeXInfo::getAbbreviatedAuthor(bool jurabib_style) const
 259 {
 260         if (!is_bibtex_) {
 261                 docstring const opt = label();
 262                 if (opt.empty())
 263                         return docstring();
 264
 265                 docstring authors;
 266                 docstring const remainder = trim(split(opt, authors, '('));
 267                 if (remainder.empty())
 268                         // in this case, we didn't find a "(",
 269                         // so we don't have author (year)
 270                         return docstring();
 271                 return authors;
 272         }
 273
 274         docstring author = operator[]("author");
 275         if (author.empty()) {
 276                 author = operator[]("editor");
 277                 if (author.empty())
 278                         return author;
 279         }
 280
 281         // FIXME Move this to a separate routine that can
 282         // be called from elsewhere.
 283         //
 284         // OK, we've got some names. Let's format them.
 285         // Try to split the author list on " and "
 286         vector<docstring> const authors =
 287                 getVectorFromString(author, from_ascii(" and "));
 288
 289         if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
 290                 docstring shortauthor = familyName(authors[0])
 291                         + "/" + familyName(authors[1]);
 292                 if (authors.size() == 3)
 293                         shortauthor += "/" + familyName(authors[2]);
 294                 return convertLaTeXCommands(shortauthor);
 295         }
 296
 297         docstring retval = familyName(authors[0]);
 298
 299         if (authors.size() == 2 && authors[1] != "others")
 300                 retval = bformat(from_ascii("%1$s and %2$s"),
 301                         familyName(authors[0]), familyName(authors[1]));
 302
 303         if (authors.size() >= 2)
 304                 retval = bformat(from_ascii("%1$s et al."),
 305                         familyName(authors[0]));
 306
 307         return convertLaTeXCommands(retval);
 308 }
 309
 310
 311 docstring const BibTeXInfo::getAbbreviatedAuthor(Buffer const & buf, bool jurabib_style) const
 312 {
 313         docstring const author = getAbbreviatedAuthor(jurabib_style);
 314         if (!is_bibtex_)
 315                 return author;
 316         vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
 317         if (authors.size() == 2)
 318                 return bformat(buf.B_("%1$s and %2$s"), authors[0], authors[1]);
 319         docstring::size_type const idx = author.rfind(from_ascii(" et al."));
 320         if (idx != docstring::npos)
 321                 return bformat(buf.B_("%1$s et al."), author.substr(0, idx));
 322         return author;
 323 }
 324
 325
 326 docstring const BibTeXInfo::getYear() const
 327 {
 328         if (is_bibtex_)
 329                 return operator[]("year");
 330
 331         docstring const opt = label();
 332         if (opt.empty())
 333                 return docstring();
 334
 335         docstring authors;
 336         docstring tmp = split(opt, authors, '(');
 337         if (tmp.empty())
 338                 // we don't have author (year)
 339                 return docstring();
 340         docstring year;
 341         tmp = split(tmp, year, ')');
 342         return year;
 343 }
 344
 345
 346 docstring const BibTeXInfo::getXRef() const
 347 {
 348         if (!is_bibtex_)
 349                 return docstring();
 350         return operator[]("crossref");
 351 }
 352
 353
 354 namespace {
 355
 356 docstring parseOptions(docstring const & format, string & optkey,
 357                     docstring & ifpart, docstring & elsepart);
 358
 359 // Calls parseOptions to deal with an embedded option, such as:
 360 //   {%number%[[, no.~%number%]]}
 361 // which must appear at the start of format. ifelsepart gets the
 362 // whole of the option, and we return what's left after the option.
 363 // we return format if there is an error.
 364 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 365 {
 366         LASSERT(format[0] == '{' && format[1] == '%', return format);
 367         string optkey;
 368         docstring ifpart;
 369         docstring elsepart;
 370         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 371         if (format == rest) { // parse error
 372                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 373                 return format;
 374         }
 375         LASSERT(rest.size() <= format.size(),
 376                 { ifelsepart = docstring(); return format; });
 377         ifelsepart = format.substr(0, format.size() - rest.size());
 378         return rest;
 379 }
 380
 381
 382 // Gets a "clause" from a format string, where the clause is
 383 // delimited by '[[' and ']]'. Returns what is left after the
 384 // clause is removed, and returns format if there is an error.
 385 docstring getClause(docstring const & format, docstring & clause)
 386 {
 387         docstring fmt = format;
 388         // remove '[['
 389         fmt = fmt.substr(2);
 390         // we'll remove characters from the front of fmt as we
 391         // deal with them
 392         while (!fmt.empty()) {
 393                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 394                         // that's the end
 395                         fmt = fmt.substr(2);
 396                         break;
 397                 }
 398                 // check for an embedded option
 399                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 400                         docstring part;
 401                         docstring const rest = parseEmbeddedOption(fmt, part);
 402                         if (fmt == rest) {
 403                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 404                                 return format;
 405                         }
 406                         clause += part;
 407                         fmt = rest;
 408                 } else { // it's just a normal character
 409                                 clause += fmt[0];
 410                                 fmt = fmt.substr(1);
 411                 }
 412         }
 413         return fmt;
 414 }
 415
 416
 417 // parse an options string, which must appear at the start of the
 418 // format parameter. puts the parsed bits in optkey, ifpart, and
 419 // elsepart and returns what's left after the option is removed.
 420 // if there's an error, it returns format itself.
 421 docstring parseOptions(docstring const & format, string & optkey,
 422                     docstring & ifpart, docstring & elsepart)
 423 {
 424         LASSERT(format[0] == '{' && format[1] == '%', return format);
 425         // strip '{%'
 426         docstring fmt = format.substr(2);
 427         size_t pos = fmt.find('%'); // end of key
 428         if (pos == string::npos) {
 429                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 430                 return format;
 431         }
 432         optkey = to_utf8(fmt.substr(0, pos));
 433         fmt = fmt.substr(pos + 1);
 434         // [[format]] should be next
 435         if (fmt[0] != '[' || fmt[1] != '[') {
 436                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 437                 return format;
 438         }
 439
 440         docstring curfmt = fmt;
 441         fmt = getClause(curfmt, ifpart);
 442         if (fmt == curfmt) {
 443                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 444                 return format;
 445         }
 446
 447         if (fmt[0] == '}') // we're done, no else clause
 448                 return fmt.substr(1);
 449
 450         // else part should follow
 451         if (fmt[0] != '[' || fmt[1] != '[') {
 452                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 453                 return format;
 454         }
 455
 456         curfmt = fmt;
 457         fmt = getClause(curfmt, elsepart);
 458         // we should be done
 459         if (fmt == curfmt || fmt[0] != '}') {
 460                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 461                 return format;
 462         }
 463         return fmt.substr(1);
 464 }
 465
 466
 467 } // anon namespace
 468
 469 /* FIXME
 470 Bug #9131 revealed an oddity in how we are generating citation information
 471 when more than one key is given. We end up building a longer and longer format
 472 string as we go, which we then have to re-parse, over and over and over again,
 473 rather than generating the information for the individual keys and then putting
 474 all of that together. We do that to deal with the way separators work, from what
 475 I can tell, but it still feels like a hack. Fixing this would require quite a
 476 bit of work, however.
 477 */
 478 docstring BibTeXInfo::expandFormat(docstring const & format,
 479                 BibTeXInfo const * const xref, int & counter, Buffer const & buf,
 480                 docstring before, docstring after, docstring dialog, bool next) const
 481 {
 482         // incorrect use of macros could put us in an infinite loop
 483         static int const max_passes = 5000;
 484         // the use of overly large keys can lead to performance problems, due
 485         // to eventual attempts to convert LaTeX macros to unicode. See bug
 486         // #8944. This is perhaps not the best solution, but it will have to
 487         // do for now.
 488         static size_t const max_keysize = 128;
 489         odocstringstream ret; // return value
 490         string key;
 491         bool scanning_key = false;
 492         bool scanning_rich = false;
 493
 494         CiteEngineType const engine_type = buf.params().citeEngineType();
 495         docstring fmt = format;
 496         // we'll remove characters from the front of fmt as we
 497         // deal with them
 498         while (!fmt.empty()) {
 499                 if (counter > max_passes) {
 500                         LYXERR0("Recursion limit reached while parsing `"
 501                                 << format << "'.");
 502                         return _("ERROR!");
 503                 }
 504
 505                 char_type thischar = fmt[0];
 506                 if (thischar == '%') {
 507                         // beginning or end of key
 508                         if (scanning_key) {
 509                                 // end of key
 510                                 scanning_key = false;
 511                                 // so we replace the key with its value, which may be empty
 512                                 if (key[0] == '!') {
 513                                         // macro
 514                                         string const val =
 515                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 516                                         fmt = from_utf8(val) + fmt.substr(1);
 517                                         counter += 1;
 518                                         continue;
 519                                 } else if (key[0] == '_') {
 520                                         // a translatable bit
 521                                         string const val =
 522                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 523                                         docstring const trans =
 524                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 525                                         ret << trans;
 526                                 } else {
 527                                         docstring const val =
 528                                                 getValueForKey(key, buf, before, after, dialog, xref, max_keysize);
 529                                         if (!scanning_rich)
 530                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 531                                         ret << val;
 532                                         if (!scanning_rich)
 533                                                 ret << from_ascii("{!</span>!}");
 534                                 }
 535                         } else {
 536                                 // beginning of key
 537                                 key.clear();
 538                                 scanning_key = true;
 539                         }
 540                 }
 541                 else if (thischar == '{') {
 542                         // beginning of option?
 543                         if (scanning_key) {
 544                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 545                                 return _("ERROR!");
 546                         }
 547                         if (fmt.size() > 1) {
 548                                 if (fmt[1] == '%') {
 549                                         // it is the beginning of an optional format
 550                                         string optkey;
 551                                         docstring ifpart;
 552                                         docstring elsepart;
 553                                         docstring const newfmt =
 554                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 555                                         if (newfmt == fmt) // parse error
 556                                                 return _("ERROR!");
 557                                         fmt = newfmt;
 558                                         docstring const val =
 559                                                 getValueForKey(optkey, buf, before, after, dialog, xref);
 560                                         if (optkey == "next" && next)
 561                                                 ret << ifpart; // without expansion
 562                                         else if (!val.empty()) {
 563                                                 int newcounter = 0;
 564                                                 ret << expandFormat(ifpart, xref, newcounter, buf,
 565                                                         before, after, dialog, next);
 566                                         } else if (!elsepart.empty()) {
 567                                                 int newcounter = 0;
 568                                                 ret << expandFormat(elsepart, xref, newcounter, buf,
 569                                                         before, after, dialog, next);
 570                                         }
 571                                         // fmt will have been shortened for us already
 572                                         continue;
 573                                 }
 574                                 if (fmt[1] == '!') {
 575                                         // beginning of rich text
 576                                         scanning_rich = true;
 577                                         fmt = fmt.substr(2);
 578                                         ret << from_ascii("{!");
 579                                         continue;
 580                                 }
 581                         }
 582                         // we are here if '{' was not followed by % or !.
 583                         // So it's just a character.
 584                         ret << thischar;
 585                 }
 586                 else if (scanning_rich && thischar == '!'
 587                          && fmt.size() > 1 && fmt[1] == '}') {
 588                         // end of rich text
 589                         scanning_rich = false;
 590                         fmt = fmt.substr(2);
 591                         ret << from_ascii("!}");
 592                         continue;
 593                 }
 594                 else if (scanning_key)
 595                         key += char(thischar);
 596                 else {
 597                         try {
 598                                 ret.put(thischar);
 599                         } catch (EncodingException & /* e */) {
 600                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 601                         }
 602                 }
 603                 fmt = fmt.substr(1);
 604         } // for loop
 605         if (scanning_key) {
 606                 LYXERR0("Never found end of key in `" << format << "'!");
 607                 return _("ERROR!");
 608         }
 609         if (scanning_rich) {
 610                 LYXERR0("Never found end of rich text in `" << format << "'!");
 611                 return _("ERROR!");
 612         }
 613         return ret.str();
 614 }
 615
 616
 617 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
 618         Buffer const & buf, bool richtext) const
 619 {
 620         if (!richtext && !info_.empty())
 621                 return info_;
 622         if (richtext && !info_richtext_.empty())
 623                 return info_richtext_;
 624
 625         if (!is_bibtex_) {
 626                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 627                 info_ = it->second;
 628                 return info_;
 629         }
 630
 631         CiteEngineType const engine_type = buf.params().citeEngineType();
 632         DocumentClass const & dc = buf.params().documentClass();
 633         docstring const & format =
 634                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 635         int counter = 0;
 636         info_ = expandFormat(format, xref, counter, buf,
 637                 docstring(), docstring(), docstring(), false);
 638
 639         if (info_.empty()) {
 640                 // this probably shouldn't happen
 641                 return info_;
 642         }
 643
 644         if (richtext) {
 645                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 646                 return info_richtext_;
 647         }
 648
 649         info_ = convertLaTeXCommands(processRichtext(info_, false));
 650         return info_;
 651 }
 652
 653
 654 docstring const BibTeXInfo::getLabel(BibTeXInfo const * const xref,
 655         Buffer const & buf, docstring const & format, bool richtext,
 656         docstring const & before, docstring const & after,
 657         docstring const & dialog, bool next) const
 658 {
 659         docstring loclabel;
 660
 661         int counter = 0;
 662         loclabel = expandFormat(format, xref, counter, buf,
 663                 before, after, dialog, next);
 664
 665         if (!loclabel.empty() && !next) {
 666                 loclabel = processRichtext(loclabel, richtext);
 667                 loclabel = convertLaTeXCommands(loclabel);
 668         }
 669
 670         return loclabel;
 671 }
 672
 673
 674 docstring const & BibTeXInfo::operator[](docstring const & field) const
 675 {
 676         BibTeXInfo::const_iterator it = find(field);
 677         if (it != end())
 678                 return it->second;
 679         static docstring const empty_value = docstring();
 680         return empty_value;
 681 }
 682
 683
 684 docstring const & BibTeXInfo::operator[](string const & field) const
 685 {
 686         return operator[](from_ascii(field));
 687 }
 688
 689
 690 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 691         docstring const & before, docstring const & after, docstring const & dialog,
 692         BibTeXInfo const * const xref, size_t maxsize) const
 693 {
 694         // anything less is pointless
 695         LASSERT(maxsize >= 16, maxsize = 16);
 696         string key = oldkey;
 697         bool cleanit = false;
 698         if (prefixIs(oldkey, "clean:")) {
 699                 key = oldkey.substr(6);
 700                 cleanit = true;
 701         }
 702
 703         docstring ret = operator[](key);
 704         if (ret.empty() && xref)
 705                 ret = (*xref)[key];
 706         if (ret.empty()) {
 707                 // some special keys
 708                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 709                 if (key == "dialog")
 710                         ret = dialog;
 711                 else if (key == "entrytype")
 712                         ret = entry_type_;
 713                 else if (key == "key")
 714                         ret = bib_key_;
 715                 else if (key == "label")
 716                         ret = label_;
 717                 else if (key == "modifier" && modifier_ != 0)
 718                         ret = modifier_;
 719                 else if (key == "numericallabel")
 720                         ret = cite_number_;
 721                 else if (key == "abbrvauthor")
 722                         // Special key to provide abbreviated author names.
 723                         ret = getAbbreviatedAuthor(buf, false);
 724                 else if (key == "shortauthor")
 725                         // When shortauthor is not defined, jurabib automatically
 726                         // provides jurabib-style abbreviated author names. We do
 727                         // this as well.
 728                         ret = getAbbreviatedAuthor(buf, true);
 729                 else if (key == "shorttitle") {
 730                         // When shorttitle is not defined, jurabib uses for `article'
 731                         // and `periodical' entries the form `journal volume [year]'
 732                         // and for other types of entries it uses the `title' field.
 733                         if (entry_type_ == "article" || entry_type_ == "periodical")
 734                                 ret = operator[]("journal") + " " + operator[]("volume")
 735                                         + " [" + operator[]("year") + "]";
 736                         else
 737                                 ret = operator[]("title");
 738                 } else if (key == "bibentry") {
 739                         // Special key to provide the full bibliography entry: see getInfo()
 740                         CiteEngineType const engine_type = buf.params().citeEngineType();
 741                         DocumentClass const & dc = buf.params().documentClass();
 742                         docstring const & format =
 743                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 744                         int counter = 0;
 745                         ret = expandFormat(format, xref, counter, buf,
 746                                 docstring(), docstring(), docstring(), false);
 747                 } else if (key == "textbefore")
 748                         ret = before;
 749                 else if (key == "textafter")
 750                         ret = after;
 751                 else if (key == "year")
 752                         ret = getYear();
 753         }
 754
 755         if (cleanit)
 756                 ret = html::cleanAttr(ret);
 757
 758         // make sure it is not too big
 759         if (ret.size() > maxsize)
 760                 ret = ret.substr(0, maxsize - 3) + from_ascii("...");
 761         return ret;
 762 }
 763
 764
 765 //////////////////////////////////////////////////////////////////////
 766 //
 767 // BiblioInfo
 768 //
 769 //////////////////////////////////////////////////////////////////////
 770
 771 namespace {
 772
 773 // A functor for use with sort, leading to case insensitive sorting
 774 class compareNoCase: public binary_function<docstring, docstring, bool>
 775 {
 776 public:
 777         bool operator()(docstring const & s1, docstring const & s2) const {
 778                 return compare_no_case(s1, s2) < 0;
 779         }
 780 };
 781
 782 } // namespace anon
 783
 784
 785 vector<docstring> const BiblioInfo::getKeys() const
 786 {
 787         vector<docstring> bibkeys;
 788         BiblioInfo::const_iterator it  = begin();
 789         for (; it != end(); ++it)
 790                 bibkeys.push_back(it->first);
 791         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 792         return bibkeys;
 793 }
 794
 795
 796 vector<docstring> const BiblioInfo::getFields() const
 797 {
 798         vector<docstring> bibfields;
 799         set<docstring>::const_iterator it = field_names_.begin();
 800         set<docstring>::const_iterator end = field_names_.end();
 801         for (; it != end; ++it)
 802                 bibfields.push_back(*it);
 803         sort(bibfields.begin(), bibfields.end());
 804         return bibfields;
 805 }
 806
 807
 808 vector<docstring> const BiblioInfo::getEntries() const
 809 {
 810         vector<docstring> bibentries;
 811         set<docstring>::const_iterator it = entry_types_.begin();
 812         set<docstring>::const_iterator end = entry_types_.end();
 813         for (; it != end; ++it)
 814                 bibentries.push_back(*it);
 815         sort(bibentries.begin(), bibentries.end());
 816         return bibentries;
 817 }
 818
 819
 820 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
 821 {
 822         BiblioInfo::const_iterator it = find(key);
 823         if (it == end())
 824                 return docstring();
 825         BibTeXInfo const & data = it->second;
 826         return data.getAbbreviatedAuthor(buf, false);
 827 }
 828
 829
 830 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
 831 {
 832         BiblioInfo::const_iterator it = find(key);
 833         if (it == end())
 834                 return docstring();
 835         BibTeXInfo const & data = it->second;
 836         return data.citeNumber();
 837 }
 838
 839
 840 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 841 {
 842         BiblioInfo::const_iterator it = find(key);
 843         if (it == end())
 844                 return docstring();
 845         BibTeXInfo const & data = it->second;
 846         docstring year = data.getYear();
 847         if (year.empty()) {
 848                 // let's try the crossref
 849                 docstring const xref = data.getXRef();
 850                 if (xref.empty())
 851                         // no luck
 852                         return docstring();
 853                 BiblioInfo::const_iterator const xrefit = find(xref);
 854                 if (xrefit == end())
 855                         // no luck again
 856                         return docstring();
 857                 BibTeXInfo const & xref_data = xrefit->second;
 858                 year = xref_data.getYear();
 859         }
 860         if (use_modifier && data.modifier() != 0)
 861                 year += data.modifier();
 862         return year;
 863 }
 864
 865
 866 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
 867 {
 868         docstring const year = getYear(key, use_modifier);
 869         if (year.empty())
 870                 return buf.B_("No year");
 871         return year;
 872 }
 873
 874
 875 docstring const BiblioInfo::getInfo(docstring const & key,
 876         Buffer const & buf, bool richtext) const
 877 {
 878         BiblioInfo::const_iterator it = find(key);
 879         if (it == end())
 880                 return docstring(_("Bibliography entry not found!"));
 881         BibTeXInfo const & data = it->second;
 882         BibTeXInfo const * xrefptr = 0;
 883         docstring const xref = data.getXRef();
 884         if (!xref.empty()) {
 885                 BiblioInfo::const_iterator const xrefit = find(xref);
 886                 if (xrefit != end())
 887                         xrefptr = &(xrefit->second);
 888         }
 889         return data.getInfo(xrefptr, buf, richtext);
 890 }
 891
 892
 893 docstring const BiblioInfo::getLabel(vector<docstring> keys,
 894         Buffer const & buf, string const & style, bool for_xhtml,
 895         size_t max_size, docstring const & before, docstring const & after,
 896         docstring const & dialog) const
 897 {
 898         // shorter makes no sense
 899         LASSERT(max_size >= 16, max_size = 16);
 900
 901         // we can't display more than 10 of these, anyway
 902         bool const too_many_keys = keys.size() > 10;
 903         if (too_many_keys)
 904                 keys.resize(10);
 905
 906         CiteEngineType const engine_type = buf.params().citeEngineType();
 907         DocumentClass const & dc = buf.params().documentClass();
 908         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
 909         docstring ret = format;
 910         vector<docstring>::const_iterator key = keys.begin();
 911         vector<docstring>::const_iterator ken = keys.end();
 912         for (; key != ken; ++key) {
 913                 BiblioInfo::const_iterator it = find(*key);
 914                 BibTeXInfo empty_data;
 915                 empty_data.key(*key);
 916                 BibTeXInfo & data = empty_data;
 917                 BibTeXInfo const * xrefptr = 0;
 918                 if (it != end()) {
 919                         data = it->second;
 920                         docstring const xref = data.getXRef();
 921                         if (!xref.empty()) {
 922                                 BiblioInfo::const_iterator const xrefit = find(xref);
 923                                 if (xrefit != end())
 924                                         xrefptr = &(xrefit->second);
 925                         }
 926                 }
 927                 ret = data.getLabel(xrefptr, buf, ret, for_xhtml,
 928                         before, after, dialog, key + 1 != ken);
 929         }
 930
 931         if (ret.size() > max_size) {
 932                 ret.resize(max_size - 3);
 933                 ret += "...";
 934         } else if (too_many_keys) {
 935                 if (ret.size() > max_size - 3)
 936                         ret.resize(max_size - 3);
 937                 ret += "...";
 938         }
 939         return ret;
 940 }
 941
 942
 943 bool BiblioInfo::isBibtex(docstring const & key) const
 944 {
 945         BiblioInfo::const_iterator it = find(key);
 946         if (it == end())
 947                 return false;
 948         return it->second.isBibTeX();
 949 }
 950
 951
 952 vector<docstring> const BiblioInfo::getCiteStrings(
 953         vector<docstring> const & keys, vector<CitationStyle> const & styles,
 954         Buffer const & buf, docstring const & before,
 955         docstring const & after, docstring const & dialog, size_t max_size) const
 956 {
 957         if (empty())
 958                 return vector<docstring>();
 959
 960         string style;
 961         vector<docstring> vec(styles.size());
 962         for (size_t i = 0; i != vec.size(); ++i) {
 963                 style = styles[i].cmd;
 964                 vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
 965         }
 966
 967         return vec;
 968 }
 969
 970
 971 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 972 {
 973         bimap_.insert(info.begin(), info.end());
 974         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
 975         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
 976 }
 977
 978
 979 namespace {
 980
 981 // used in xhtml to sort a list of BibTeXInfo objects
 982 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 983 {
 984         docstring const lauth = lhs->getAbbreviatedAuthor();
 985         docstring const rauth = rhs->getAbbreviatedAuthor();
 986         docstring const lyear = lhs->getYear();
 987         docstring const ryear = rhs->getYear();
 988         docstring const ltitl = lhs->operator[]("title");
 989         docstring const rtitl = rhs->operator[]("title");
 990         return  (lauth < rauth)
 991                 || (lauth == rauth && lyear < ryear)
 992                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
 993 }
 994
 995 }
 996
 997
 998 void BiblioInfo::collectCitedEntries(Buffer const & buf)
 999 {
1000         cited_entries_.clear();
1001         // We are going to collect all the citation keys used in the document,
1002         // getting them from the TOC.
1003         // FIXME We may want to collect these differently, in the first case,
1004         // so that we might have them in order of appearance.
1005         set<docstring> citekeys;
1006         Toc const & toc = buf.tocBackend().toc("citation");
1007         Toc::const_iterator it = toc.begin();
1008         Toc::const_iterator const en = toc.end();
1009         for (; it != en; ++it) {
1010                 if (it->str().empty())
1011                         continue;
1012                 vector<docstring> const keys = getVectorFromString(it->str());
1013                 citekeys.insert(keys.begin(), keys.end());
1014         }
1015         if (citekeys.empty())
1016                 return;
1017
1018         // We have a set of the keys used in this document.
1019         // We will now convert it to a list of the BibTeXInfo objects used in
1020         // this document...
1021         vector<BibTeXInfo const *> bi;
1022         set<docstring>::const_iterator cit = citekeys.begin();
1023         set<docstring>::const_iterator const cen = citekeys.end();
1024         for (; cit != cen; ++cit) {
1025                 BiblioInfo::const_iterator const bt = find(*cit);
1026                 if (bt == end() || !bt->second.isBibTeX())
1027                         continue;
1028                 bi.push_back(&(bt->second));
1029         }
1030         // ...and sort it.
1031         sort(bi.begin(), bi.end(), lSorter);
1032
1033         // Now we can write the sorted keys
1034         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1035         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1036         for (; bit != ben; ++bit)
1037                 cited_entries_.push_back((*bit)->key());
1038 }
1039
1040
1041 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1042 {
1043         collectCitedEntries(buf);
1044         CiteEngineType const engine_type = buf.params().citeEngineType();
1045         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1046
1047         int keynumber = 0;
1048         char modifier = 0;
1049         // used to remember the last one we saw
1050         // we'll be comparing entries to see if we need to add
1051         // modifiers, like "1984a"
1052         map<docstring, BibTeXInfo>::iterator last;
1053
1054         vector<docstring>::const_iterator it = cited_entries_.begin();
1055         vector<docstring>::const_iterator const en = cited_entries_.end();
1056         for (; it != en; ++it) {
1057                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1058                 // this shouldn't happen, but...
1059                 if (biit == bimap_.end())
1060                         // ...fail gracefully, anyway.
1061                         continue;
1062                 BibTeXInfo & entry = biit->second;
1063                 if (numbers) {
1064                         docstring const num = convert<docstring>(++keynumber);
1065                         entry.setCiteNumber(num);
1066                 } else {
1067                         if (it != cited_entries_.begin()
1068                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
1069                             // we access the year via getYear() so as to get it from the xref,
1070                             // if we need to do so
1071                             && getYear(entry.key()) == getYear(last->second.key())) {
1072                                 if (modifier == 0) {
1073                                         // so the last one should have been 'a'
1074                                         last->second.setModifier('a');
1075                                         modifier = 'b';
1076                                 } else if (modifier == 'z')
1077                                         modifier = 'A';
1078                                 else
1079                                         modifier++;
1080                         } else {
1081                                 modifier = 0;
1082                         }
1083                         entry.setModifier(modifier);
1084                         // remember the last one
1085                         last = biit;
1086                 }
1087         }
1088         // Set the labels
1089         it = cited_entries_.begin();
1090         for (; it != en; ++it) {
1091                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1092                 // this shouldn't happen, but...
1093                 if (biit == bimap_.end())
1094                         // ...fail gracefully, anyway.
1095                         continue;
1096                 BibTeXInfo & entry = biit->second;
1097                 if (numbers) {
1098                         entry.label(entry.citeNumber());
1099                 } else {
1100                         docstring const auth = entry.getAbbreviatedAuthor(buf, false);
1101                         // we do it this way so as to access the xref, if necessary
1102                         // note that this also gives us the modifier
1103                         docstring const year = getYear(*it, buf, true);
1104                         if (!auth.empty() && !year.empty())
1105                                 entry.label(auth + ' ' + year);
1106                         else
1107                                 entry.label(entry.key());
1108                 }
1109         }
1110 }
1111
1112
1113 //////////////////////////////////////////////////////////////////////
1114 //
1115 // CitationStyle
1116 //
1117 //////////////////////////////////////////////////////////////////////
1118
1119
1120 CitationStyle citationStyleFromString(string const & command)
1121 {
1122         CitationStyle cs;
1123         if (command.empty())
1124                 return cs;
1125
1126         string cmd = command;
1127         if (cmd[0] == 'C') {
1128                 cs.forceUpperCase = true;
1129                 cmd[0] = 'c';
1130         }
1131
1132         size_t const n = cmd.size() - 1;
1133         if (cmd[n] == '*') {
1134                 cs.fullAuthorList = true;
1135                 cmd = cmd.substr(0, n);
1136         }
1137
1138         cs.cmd = cmd;
1139         return cs;
1140 }
1141
1142
1143 string citationStyleToString(const CitationStyle & cs)
1144 {
1145         string cmd = cs.cmd;
1146         if (cs.forceUpperCase)
1147                 cmd[0] = 'C';
1148         if (cs.fullAuthorList)
1149                 cmd += '*';
1150         return cmd;
1151 }
1152
1153 } // namespace lyx