src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18 #include "Buffer.h"
  19 #include "BufferParams.h"
  20 #include "buffer_funcs.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "InsetIterator.h"
  24 #include "Language.h"
  25 #include "output_xhtml.h"
  26 #include "Paragraph.h"
  27 #include "TextClass.h"
  28 #include "TocBackend.h"
  29
  30 #include "support/convert.h"
  31 #include "support/debug.h"
  32 #include "support/docstream.h"
  33 #include "support/gettext.h"
  34 #include "support/lassert.h"
  35 #include "support/lstrings.h"
  36 #include "support/regex.h"
  37 #include "support/textutils.h"
  38
  39 #include <map>
  40 #include <set>
  41
  42 using namespace std;
  43 using namespace lyx::support;
  44
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 // Remove placeholders from names
  51 docstring renormalize(docstring const & input)
  52 {
  53         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  54         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  55 }
  56
  57
  58 struct name_parts {
  59         docstring surname;
  60         docstring prename;
  61         docstring suffix;
  62 };
  63
  64
  65 // gets the "prename" and "family name" from an author-type string
  66 name_parts nameParts(docstring const & iname)
  67 {
  68         name_parts res;
  69         if (iname.empty())
  70                 return res;
  71
  72         // First we check for goupings (via {...}) and replace blanks and
  73         // commas inside groups with temporary placeholders
  74         docstring name;
  75         int gl = 0;
  76         docstring::const_iterator p = iname.begin();
  77         while (p != iname.end()) {
  78                 // count grouping level
  79                 if (*p == '{')
  80                         ++gl;
  81                 else if (*p == '}')
  82                         --gl;
  83                 // generate string with probable placeholders
  84                 if (*p == ' ' && gl > 0)
  85                         name += from_ascii("$$space!");
  86                 else if (*p == ',' && gl > 0)
  87                         name += from_ascii("$$comma!");
  88                 else
  89                         name += *p;
  90                 ++p;
  91         }
  92
  93         // Now we look for a comma, and take the last name to be everything
  94         // preceding the right-most one, so that we also get the "jr" part.
  95         vector<docstring> pieces = getVectorFromString(name);
  96         if (pieces.size() > 1) {
  97                 // whether we have a jr. part or not, it's always
  98                 // the first and last item (reversed)
  99                 res.surname = renormalize(pieces.front());
 100                 res.prename = renormalize(pieces.back());
 101                 // If we have three pieces (the maximum allowed by BibTeX),
 102                 // the second one is the jr part.
 103                 if (pieces.size() > 2)
 104                         res.suffix = renormalize(pieces.at(1));
 105                 return res;
 106         }
 107
 108         // OK, so now we want to look for the last name. We're going to
 109         // include the "von" part. This isn't perfect.
 110         // Split on spaces, to get various tokens.
 111         pieces = getVectorFromString(name, from_ascii(" "));
 112         // No space: Only a family name given
 113         if (pieces.size() < 2) {
 114                 res.surname = renormalize(pieces.back());
 115                 return res;
 116         }
 117         // If we get two pieces, assume the last one is the last name
 118         if (pieces.size() == 2) {
 119                 res.surname = renormalize(pieces.back());
 120                 res.prename = renormalize(pieces.front());
 121                 return res;
 122         }
 123
 124         // More than 3 pieces: Now we look for the first piece that
 125         // begins with a lower case letter (the "von-part").
 126         docstring prename;
 127         vector<docstring>::const_iterator it = pieces.begin();
 128         vector<docstring>::const_iterator const en = pieces.end();
 129         bool first = true;
 130         for (; it != en; ++it) {
 131                 if ((*it).empty())
 132                         continue;
 133                 char_type const c = (*it)[0];
 134                 // If the piece starts with a lower case char, we assume
 135                 // this is the "von-part" (family name prefix) and thus part
 136                 // of the family name.
 137                 if (isLower(c))
 138                         break;
 139                 // If this is the last piece, then what we now have is
 140                 // the family name.
 141                 if (it + 1 == en)
 142                         break;
 143                 // Nothing of the former, so add this piece to the prename
 144                 if (!first)
 145                         prename += " ";
 146                 else
 147                         first = false;
 148                 prename += *it;
 149         }
 150
 151         // Reconstruct the family name.
 152         // Note that if we left the loop with because it + 1 == en,
 153         // then this will still do the right thing, i.e., make surname
 154         // just be the last piece.
 155         docstring surname;
 156         first = true;
 157         for (; it != en; ++it) {
 158                 if (!first)
 159                         surname += " ";
 160                 else
 161                         first = false;
 162                 surname += *it;
 163         }
 164         res.surname = renormalize(surname);
 165         res.prename = renormalize(prename);
 166         return res;
 167 }
 168
 169
 170 docstring constructName(docstring const & name, string const scheme)
 171 {
 172         // re-constructs a name from name parts according
 173         // to a given scheme
 174         docstring const prename = nameParts(name).prename;
 175         docstring const surname = nameParts(name).surname;
 176         docstring const suffix = nameParts(name).suffix;
 177         string res = scheme;
 178         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 179         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 180         smatch sub;
 181         if (regex_match(scheme, sub, reg1)) {
 182                 res = sub.str(1);
 183                 if (!prename.empty())
 184                         res += sub.str(3);
 185                 res += sub.str(5);
 186         }
 187         if (regex_match(res, sub, reg2)) {
 188                 res = sub.str(1);
 189                 if (!suffix.empty())
 190                         res += sub.str(3);
 191                 res += sub.str(5);
 192         }
 193         docstring result = from_ascii(res);
 194         result = subst(result, from_ascii("%prename%"), prename);
 195         result = subst(result, from_ascii("%surname%"), surname);
 196         result = subst(result, from_ascii("%suffix%"), suffix);
 197         return result;
 198 }
 199
 200
 201 vector<docstring> const getAuthors(docstring const & author)
 202 {
 203         // We check for goupings (via {...}) and only consider " and "
 204         // outside groups as author separator. This is to account
 205         // for cases such as {{Barnes and Noble, Inc.}}, which
 206         // need to be treated as one single family name.
 207         // We use temporary placeholders in order to differentiate the
 208         // diverse " and " cases.
 209
 210         // First, we temporarily replace all ampersands. It is rather unusual
 211         // in author names, but can happen (consider cases such as "C \& A Corp.").
 212         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 213         // Then, we temporarily make all " and " strings to ampersands in order
 214         // to handle them later on a per-char level.
 215         iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
 216         // Now we traverse through the string and replace the "&" by the proper
 217         // output in- and outside groups
 218         docstring name;
 219         int gl = 0;
 220         docstring::const_iterator p = iname.begin();
 221         while (p != iname.end()) {
 222                 // count grouping level
 223                 if (*p == '{')
 224                         ++gl;
 225                 else if (*p == '}')
 226                         --gl;
 227                 // generate string with probable placeholders
 228                 if (*p == '&') {
 229                         if (gl > 0)
 230                                 // Inside groups, we output "and"
 231                                 name += from_ascii("and");
 232                         else
 233                                 // Outside groups, we output a separator
 234                                 name += from_ascii("$$namesep!");
 235                 }
 236                 else
 237                         name += *p;
 238                 ++p;
 239         }
 240
 241         // re-insert the literal ampersands
 242         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 243
 244         // Now construct the actual vector
 245         return getVectorFromString(name, from_ascii(" $$namesep! "));
 246 }
 247
 248
 249 bool multipleAuthors(docstring const author)
 250 {
 251         return getAuthors(author).size() > 1;
 252 }
 253
 254
 255 // converts a string containing LaTeX commands into unicode
 256 // for display.
 257 docstring convertLaTeXCommands(docstring const & str)
 258 {
 259         docstring val = str;
 260         docstring ret;
 261
 262         bool scanning_cmd = false;
 263         bool scanning_math = false;
 264         bool escaped = false; // used to catch \$, etc.
 265         while (!val.empty()) {
 266                 char_type const ch = val[0];
 267
 268                 // if we're scanning math, we output everything until we
 269                 // find an unescaped $, at which point we break out.
 270                 if (scanning_math) {
 271                         if (escaped)
 272                                 escaped = false;
 273                         else if (ch == '\\')
 274                                 escaped = true;
 275                         else if (ch == '$')
 276                                 scanning_math = false;
 277                         ret += ch;
 278                         val = val.substr(1);
 279                         continue;
 280                 }
 281
 282                 // if we're scanning a command name, then we just
 283                 // discard characters until we hit something that
 284                 // isn't alpha.
 285                 if (scanning_cmd) {
 286                         if (isAlphaASCII(ch)) {
 287                                 val = val.substr(1);
 288                                 escaped = false;
 289                                 continue;
 290                         }
 291                         // so we're done with this command.
 292                         // now we fall through and check this character.
 293                         scanning_cmd = false;
 294                 }
 295
 296                 // was the last character a \? If so, then this is something like:
 297                 // \\ or \$, so we'll just output it. That's probably not always right...
 298                 if (escaped) {
 299                         // exception: output \, as THIN SPACE
 300                         if (ch == ',')
 301                                 ret.push_back(0x2009);
 302                         else
 303                                 ret += ch;
 304                         val = val.substr(1);
 305                         escaped = false;
 306                         continue;
 307                 }
 308
 309                 if (ch == '$') {
 310                         ret += ch;
 311                         val = val.substr(1);
 312                         scanning_math = true;
 313                         continue;
 314                 }
 315
 316                 // we just ignore braces
 317                 if (ch == '{' || ch == '}') {
 318                         val = val.substr(1);
 319                         continue;
 320                 }
 321
 322                 // we're going to check things that look like commands, so if
 323                 // this doesn't, just output it.
 324                 if (ch != '\\') {
 325                         ret += ch;
 326                         val = val.substr(1);
 327                         continue;
 328                 }
 329
 330                 // ok, could be a command of some sort
 331                 // let's see if it corresponds to some unicode
 332                 // unicodesymbols has things in the form: \"{u},
 333                 // whereas we may see things like: \"u. So we'll
 334                 // look for that and change it, if necessary.
 335                 // FIXME: This is a sort of mini-tex2lyx.
 336                 //        Use the real tex2lyx instead!
 337                 static lyx::regex const reg("^\\\\\\W\\w");
 338                 if (lyx::regex_search(to_utf8(val), reg)) {
 339                         val.insert(3, from_ascii("}"));
 340                         val.insert(2, from_ascii("{"));
 341                 }
 342                 bool termination;
 343                 docstring rem;
 344                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 345                                 Encodings::TEXT_CMD, termination, rem);
 346                 if (!cnvtd.empty()) {
 347                         // it did, so we'll take that bit and proceed with what's left
 348                         ret += cnvtd;
 349                         val = rem;
 350                         continue;
 351                 }
 352                 // it's a command of some sort
 353                 scanning_cmd = true;
 354                 escaped = true;
 355                 val = val.substr(1);
 356         }
 357         return ret;
 358 }
 359
 360
 361 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 362 docstring processRichtext(docstring const & str, bool richtext)
 363 {
 364         docstring val = str;
 365         docstring ret;
 366
 367         bool scanning_rich = false;
 368         while (!val.empty()) {
 369                 char_type const ch = val[0];
 370                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 371                         // beginning of rich text
 372                         scanning_rich = true;
 373                         val = val.substr(2);
 374                         continue;
 375                 }
 376                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 377                         // end of rich text
 378                         scanning_rich = false;
 379                         val = val.substr(2);
 380                         continue;
 381                 }
 382                 if (richtext) {
 383                         if (scanning_rich)
 384                                 ret += ch;
 385                         else {
 386                                 // we need to escape '<' and '>'
 387                                 if (ch == '<')
 388                                         ret += "&lt;";
 389                                 else if (ch == '>')
 390                                         ret += "&gt;";
 391                                 else
 392                                         ret += ch;
 393                         }
 394                 } else if (!scanning_rich /* && !richtext */)
 395                         ret += ch;
 396                 // else the character is discarded, which will happen only if
 397                 // richtext == false and we are scanning rich text
 398                 val = val.substr(1);
 399         }
 400         return ret;
 401 }
 402
 403 } // anon namespace
 404
 405
 406 //////////////////////////////////////////////////////////////////////
 407 //
 408 // BibTeXInfo
 409 //
 410 //////////////////////////////////////////////////////////////////////
 411
 412 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 413         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 414           modifier_(0)
 415 {}
 416
 417
 418
 419 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 420                                           bool full, bool forceshort) const
 421 {
 422         docstring author = operator[]("author");
 423         if (author.empty())
 424                 author = operator[]("editor");
 425
 426         return getAuthorList(buf, author, full, forceshort);
 427 }
 428
 429
 430 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 431                 docstring const & author, bool const full, bool const forceshort,
 432                 bool const allnames, bool const beginning) const
 433 {
 434         // Maxnames treshold depend on engine
 435         size_t maxnames = buf ?
 436                 buf->params().documentClass().max_citenames() : 2;
 437
 438         if (!is_bibtex_) {
 439                 docstring const opt = label();
 440                 if (opt.empty())
 441                         return docstring();
 442
 443                 docstring authors;
 444                 docstring const remainder = trim(split(opt, authors, '('));
 445                 if (remainder.empty())
 446                         // in this case, we didn't find a "(",
 447                         // so we don't have author (year)
 448                         return docstring();
 449                 return authors;
 450         }
 451
 452         if (author.empty())
 453                 return author;
 454
 455         // OK, we've got some names. Let's format them.
 456         // Try to split the author list
 457         vector<docstring> const authors = getAuthors(author);
 458
 459         docstring retval;
 460
 461         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 462                                                : ENGINE_TYPE_DEFAULT;
 463
 464         // These are defined in the styles
 465         string const etal =
 466                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
 467                     : " et al.";
 468         string const namesep =
 469                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
 470                    : ", ";
 471         string const lastnamesep =
 472                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
 473                     : ", and ";
 474         string const pairnamesep =
 475                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
 476                      : " and ";
 477         string firstnameform =
 478                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 479                              : "%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 480         if (!beginning)
 481                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 482                                              : "%prename% %surname%{%suffix%[[, %suffix%]]}";
 483         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 484                              : "%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 485         if (!beginning)
 486                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 487                                              : "%prename% %surname%{%suffix%[[, %suffix%]]}";
 488
 489         // Shorten the list (with et al.) if forceshort is set
 490         // and the list can actually be shortened, else if maxcitenames
 491         // is passed and full is not set.
 492         bool shorten = forceshort && authors.size() > 1;
 493         vector<docstring>::const_iterator it = authors.begin();
 494         vector<docstring>::const_iterator en = authors.end();
 495         for (size_t i = 0; it != en; ++it, ++i) {
 496                 if (i >= maxnames && !full) {
 497                         shorten = true;
 498                         break;
 499                 }
 500                 if (*it == "others") {
 501                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 502                         break;
 503                 }
 504                 if (i > 0 && i == authors.size() - 1) {
 505                         if (authors.size() == 2)
 506                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 507                         else
 508                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 509                 } else if (i > 0)
 510                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 511                 if (allnames)
 512                         retval += (i == 0) ? constructName(*it, firstnameform)
 513                                 : constructName(*it, othernameform);
 514                 else
 515                         retval += nameParts(*it).surname;
 516         }
 517         if (shorten) {
 518                 if (allnames)
 519                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 520                 else
 521                         retval = nameParts(authors[0]).surname + (buf ? buf->B_(etal) : from_ascii(etal));
 522         }
 523
 524         return convertLaTeXCommands(retval);
 525 }
 526
 527
 528 docstring const BibTeXInfo::getYear() const
 529 {
 530         if (is_bibtex_) {
 531                 // first try legacy year field
 532                 docstring year = operator[]("year");
 533                 if (!year.empty())
 534                         return year;
 535                 // now try biblatex's date field
 536                 year = operator[]("date");
 537                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 538                 // We only want the years.
 539                 static regex const yreg("[-]?([\\d]{4}).*");
 540                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 541                 smatch sm;
 542                 string const date = to_utf8(year);
 543                 if (!regex_match(date, sm, yreg))
 544                         // cannot parse year.
 545                         return docstring();
 546                 year = from_ascii(sm[1]);
 547                 // check for an endyear
 548                 if (regex_match(date, sm, ereg))
 549                         year += char_type(0x2013) + from_ascii(sm[1]);
 550                 return year;
 551         }
 552
 553         docstring const opt = label();
 554         if (opt.empty())
 555                 return docstring();
 556
 557         docstring authors;
 558         docstring tmp = split(opt, authors, '(');
 559         if (tmp.empty())
 560                 // we don't have author (year)
 561                 return docstring();
 562         docstring year;
 563         tmp = split(tmp, year, ')');
 564         return year;
 565 }
 566
 567
 568 namespace {
 569
 570 docstring parseOptions(docstring const & format, string & optkey,
 571                     docstring & ifpart, docstring & elsepart);
 572
 573 // Calls parseOptions to deal with an embedded option, such as:
 574 //   {%number%[[, no.~%number%]]}
 575 // which must appear at the start of format. ifelsepart gets the
 576 // whole of the option, and we return what's left after the option.
 577 // we return format if there is an error.
 578 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 579 {
 580         LASSERT(format[0] == '{' && format[1] == '%', return format);
 581         string optkey;
 582         docstring ifpart;
 583         docstring elsepart;
 584         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 585         if (format == rest) { // parse error
 586                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 587                 return format;
 588         }
 589         LASSERT(rest.size() <= format.size(),
 590                 { ifelsepart = docstring(); return format; });
 591         ifelsepart = format.substr(0, format.size() - rest.size());
 592         return rest;
 593 }
 594
 595
 596 // Gets a "clause" from a format string, where the clause is
 597 // delimited by '[[' and ']]'. Returns what is left after the
 598 // clause is removed, and returns format if there is an error.
 599 docstring getClause(docstring const & format, docstring & clause)
 600 {
 601         docstring fmt = format;
 602         // remove '[['
 603         fmt = fmt.substr(2);
 604         // we'll remove characters from the front of fmt as we
 605         // deal with them
 606         while (!fmt.empty()) {
 607                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 608                         // that's the end
 609                         fmt = fmt.substr(2);
 610                         break;
 611                 }
 612                 // check for an embedded option
 613                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 614                         docstring part;
 615                         docstring const rest = parseEmbeddedOption(fmt, part);
 616                         if (fmt == rest) {
 617                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 618                                 return format;
 619                         }
 620                         clause += part;
 621                         fmt = rest;
 622                 } else { // it's just a normal character
 623                                 clause += fmt[0];
 624                                 fmt = fmt.substr(1);
 625                 }
 626         }
 627         return fmt;
 628 }
 629
 630
 631 // parse an options string, which must appear at the start of the
 632 // format parameter. puts the parsed bits in optkey, ifpart, and
 633 // elsepart and returns what's left after the option is removed.
 634 // if there's an error, it returns format itself.
 635 docstring parseOptions(docstring const & format, string & optkey,
 636                     docstring & ifpart, docstring & elsepart)
 637 {
 638         LASSERT(format[0] == '{' && format[1] == '%', return format);
 639         // strip '{%'
 640         docstring fmt = format.substr(2);
 641         size_t pos = fmt.find('%'); // end of key
 642         if (pos == string::npos) {
 643                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 644                 return format;
 645         }
 646         optkey = to_utf8(fmt.substr(0, pos));
 647         fmt = fmt.substr(pos + 1);
 648         // [[format]] should be next
 649         if (fmt[0] != '[' || fmt[1] != '[') {
 650                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 651                 return format;
 652         }
 653
 654         docstring curfmt = fmt;
 655         fmt = getClause(curfmt, ifpart);
 656         if (fmt == curfmt) {
 657                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 658                 return format;
 659         }
 660
 661         if (fmt[0] == '}') // we're done, no else clause
 662                 return fmt.substr(1);
 663
 664         // else part should follow
 665         if (fmt[0] != '[' || fmt[1] != '[') {
 666                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 667                 return format;
 668         }
 669
 670         curfmt = fmt;
 671         fmt = getClause(curfmt, elsepart);
 672         // we should be done
 673         if (fmt == curfmt || fmt[0] != '}') {
 674                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 675                 return format;
 676         }
 677         return fmt.substr(1);
 678 }
 679
 680
 681 } // anon namespace
 682
 683 /* FIXME
 684 Bug #9131 revealed an oddity in how we are generating citation information
 685 when more than one key is given. We end up building a longer and longer format
 686 string as we go, which we then have to re-parse, over and over and over again,
 687 rather than generating the information for the individual keys and then putting
 688 all of that together. We do that to deal with the way separators work, from what
 689 I can tell, but it still feels like a hack. Fixing this would require quite a
 690 bit of work, however.
 691 */
 692 docstring BibTeXInfo::expandFormat(docstring const & format,
 693                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 694                 CiteItem const & ci, bool next, bool second) const
 695 {
 696         // incorrect use of macros could put us in an infinite loop
 697         static int const max_passes = 5000;
 698         // the use of overly large keys can lead to performance problems, due
 699         // to eventual attempts to convert LaTeX macros to unicode. See bug
 700         // #8944. By default, the size is limited to 128 (in CiteItem), but
 701         // for specific purposes (such as XHTML export), it needs to be enlarged
 702         // This is perhaps not the best solution, but it will have to do for now.
 703         size_t const max_keysize = ci.max_key_size;
 704         odocstringstream ret; // return value
 705         string key;
 706         bool scanning_key = false;
 707         bool scanning_rich = false;
 708
 709         CiteEngineType const engine_type = buf.params().citeEngineType();
 710         docstring fmt = format;
 711         // we'll remove characters from the front of fmt as we
 712         // deal with them
 713         while (!fmt.empty()) {
 714                 if (counter > max_passes) {
 715                         LYXERR0("Recursion limit reached while parsing `"
 716                                 << format << "'.");
 717                         return _("ERROR!");
 718                 }
 719
 720                 char_type thischar = fmt[0];
 721                 if (thischar == '%') {
 722                         // beginning or end of key
 723                         if (scanning_key) {
 724                                 // end of key
 725                                 scanning_key = false;
 726                                 // so we replace the key with its value, which may be empty
 727                                 if (key[0] == '!') {
 728                                         // macro
 729                                         string const val =
 730                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 731                                         fmt = from_utf8(val) + fmt.substr(1);
 732                                         counter += 1;
 733                                         continue;
 734                                 } else if (key[0] == '_') {
 735                                         // a translatable bit
 736                                         string const val =
 737                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 738                                         docstring const trans =
 739                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 740                                         ret << trans;
 741                                 } else {
 742                                         docstring const val =
 743                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 744                                         if (!scanning_rich)
 745                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 746                                         ret << val;
 747                                         if (!scanning_rich)
 748                                                 ret << from_ascii("{!</span>!}");
 749                                 }
 750                         } else {
 751                                 // beginning of key
 752                                 key.clear();
 753                                 scanning_key = true;
 754                         }
 755                 }
 756                 else if (thischar == '{') {
 757                         // beginning of option?
 758                         if (scanning_key) {
 759                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 760                                 return _("ERROR!");
 761                         }
 762                         if (fmt.size() > 1) {
 763                                 if (fmt[1] == '%') {
 764                                         // it is the beginning of an optional format
 765                                         string optkey;
 766                                         docstring ifpart;
 767                                         docstring elsepart;
 768                                         docstring const newfmt =
 769                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 770                                         if (newfmt == fmt) // parse error
 771                                                 return _("ERROR!");
 772                                         fmt = newfmt;
 773                                         docstring const val =
 774                                                 getValueForKey(optkey, buf, ci, xrefs);
 775                                         if (optkey == "next" && next)
 776                                                 ret << ifpart; // without expansion
 777                                         else if (optkey == "second" && second) {
 778                                                 int newcounter = 0;
 779                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 780                                                         ci, next);
 781                                         } else if (!val.empty()) {
 782                                                 int newcounter = 0;
 783                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 784                                                         ci, next);
 785                                         } else if (!elsepart.empty()) {
 786                                                 int newcounter = 0;
 787                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 788                                                         ci, next);
 789                                         }
 790                                         // fmt will have been shortened for us already
 791                                         continue;
 792                                 }
 793                                 if (fmt[1] == '!') {
 794                                         // beginning of rich text
 795                                         scanning_rich = true;
 796                                         fmt = fmt.substr(2);
 797                                         ret << from_ascii("{!");
 798                                         continue;
 799                                 }
 800                         }
 801                         // we are here if '{' was not followed by % or !.
 802                         // So it's just a character.
 803                         ret << thischar;
 804                 }
 805                 else if (scanning_rich && thischar == '!'
 806                          && fmt.size() > 1 && fmt[1] == '}') {
 807                         // end of rich text
 808                         scanning_rich = false;
 809                         fmt = fmt.substr(2);
 810                         ret << from_ascii("!}");
 811                         continue;
 812                 }
 813                 else if (scanning_key)
 814                         key += char(thischar);
 815                 else {
 816                         try {
 817                                 ret.put(thischar);
 818                         } catch (EncodingException & /* e */) {
 819                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 820                         }
 821                 }
 822                 fmt = fmt.substr(1);
 823         } // for loop
 824         if (scanning_key) {
 825                 LYXERR0("Never found end of key in `" << format << "'!");
 826                 return _("ERROR!");
 827         }
 828         if (scanning_rich) {
 829                 LYXERR0("Never found end of rich text in `" << format << "'!");
 830                 return _("ERROR!");
 831         }
 832         return ret.str();
 833 }
 834
 835
 836 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 837         Buffer const & buf, CiteItem const & ci) const
 838 {
 839         bool const richtext = ci.richtext;
 840
 841         if (!richtext && !info_.empty())
 842                 return info_;
 843         if (richtext && !info_richtext_.empty())
 844                 return info_richtext_;
 845
 846         if (!is_bibtex_) {
 847                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 848                 info_ = it->second;
 849                 return info_;
 850         }
 851
 852         CiteEngineType const engine_type = buf.params().citeEngineType();
 853         DocumentClass const & dc = buf.params().documentClass();
 854         docstring const & format =
 855                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 856         int counter = 0;
 857         info_ = expandFormat(format, xrefs, counter, buf,
 858                 ci, false, false);
 859
 860         if (info_.empty()) {
 861                 // this probably shouldn't happen
 862                 return info_;
 863         }
 864
 865         if (richtext) {
 866                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 867                 return info_richtext_;
 868         }
 869
 870         info_ = convertLaTeXCommands(processRichtext(info_, false));
 871         return info_;
 872 }
 873
 874
 875 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 876         Buffer const & buf, docstring const & format,
 877         CiteItem const & ci, bool next, bool second) const
 878 {
 879         docstring loclabel;
 880
 881         int counter = 0;
 882         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 883
 884         if (!loclabel.empty() && !next) {
 885                 loclabel = processRichtext(loclabel, ci.richtext);
 886                 loclabel = convertLaTeXCommands(loclabel);
 887         }
 888
 889         return loclabel;
 890 }
 891
 892
 893 docstring const & BibTeXInfo::operator[](docstring const & field) const
 894 {
 895         BibTeXInfo::const_iterator it = find(field);
 896         if (it != end())
 897                 return it->second;
 898         static docstring const empty_value = docstring();
 899         return empty_value;
 900 }
 901
 902
 903 docstring const & BibTeXInfo::operator[](string const & field) const
 904 {
 905         return operator[](from_ascii(field));
 906 }
 907
 908
 909 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 910         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 911 {
 912         // anything less is pointless
 913         LASSERT(maxsize >= 16, maxsize = 16);
 914         string key = oldkey;
 915         bool cleanit = false;
 916         if (prefixIs(oldkey, "clean:")) {
 917                 key = oldkey.substr(6);
 918                 cleanit = true;
 919         }
 920
 921         docstring ret = operator[](key);
 922         if (ret.empty() && !xrefs.empty()) {
 923                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
 924                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
 925                 for (; it != en; ++it) {
 926                         if (*it && !(**it)[key].empty()) {
 927                                 ret = (**it)[key];
 928                                 break;
 929                         }
 930                 }
 931         }
 932         if (ret.empty()) {
 933                 // some special keys
 934                 // FIXME: dialog, textbefore and textafter have nothing to do with this
 935                 if (key == "dialog" && ci.context == CiteItem::Dialog)
 936                         ret = from_ascii("x"); // any non-empty string will do
 937                 else if (key == "export" && ci.context == CiteItem::Export)
 938                         ret = from_ascii("x"); // any non-empty string will do
 939                 else if (key == "ifstar" && ci.Starred)
 940                         ret = from_ascii("x"); // any non-empty string will do
 941                 else if (key == "ifqualified" && ci.isQualified)
 942                         ret = from_ascii("x"); // any non-empty string will do
 943                 else if (key == "entrytype")
 944                         ret = entry_type_;
 945                 else if (prefixIs(key, "ifentrytype:")
 946                          && from_ascii(key.substr(12)) == entry_type_)
 947                         ret = from_ascii("x"); // any non-empty string will do
 948                 else if (key == "key")
 949                         ret = bib_key_;
 950                 else if (key == "label")
 951                         ret = label_;
 952                 else if (key == "modifier" && modifier_ != 0)
 953                         ret = modifier_;
 954                 else if (key == "numericallabel")
 955                         ret = cite_number_;
 956                 else if (prefixIs(key, "ifmultiple:")) {
 957                         // Return whether we have multiple authors
 958                         docstring const kind = operator[](from_ascii(key.substr(11)));
 959                         if (multipleAuthors(kind))
 960                                 ret = from_ascii("x"); // any non-empty string will do
 961                 }
 962                 else if (prefixIs(key, "abbrvnames:")) {
 963                         // Special key to provide abbreviated name list,
 964                         // with respect to maxcitenames. Suitable for Bibliography
 965                         // beginnings.
 966                         docstring const kind = operator[](from_ascii(key.substr(11)));
 967                         ret = getAuthorList(&buf, kind, false, false, true);
 968                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 969                                 ret[0] = uppercase(ret[0]);
 970                 } else if (prefixIs(key, "fullnames:")) {
 971                         // Return a full name list. Suitable for Bibliography
 972                         // beginnings.
 973                         docstring const kind = operator[](from_ascii(key.substr(10)));
 974                         ret = getAuthorList(&buf, kind, true, false, true);
 975                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 976                                 ret[0] = uppercase(ret[0]);
 977                 } else if (prefixIs(key, "forceabbrvnames:")) {
 978                         // Special key to provide abbreviated name lists,
 979                         // irrespective of maxcitenames. Suitable for Bibliography
 980                         // beginnings.
 981                         docstring const kind = operator[](from_ascii(key.substr(15)));
 982                         ret = getAuthorList(&buf, kind, false, true, true);
 983                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 984                                 ret[0] = uppercase(ret[0]);
 985                 } else if (prefixIs(key, "abbrvbynames:")) {
 986                         // Special key to provide abbreviated name list,
 987                         // with respect to maxcitenames. Suitable for further names inside a
 988                         // bibliography item // (such as "ed. by ...")
 989                         docstring const kind = operator[](from_ascii(key.substr(11)));
 990                         ret = getAuthorList(&buf, kind, false, false, true, false);
 991                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 992                                 ret[0] = uppercase(ret[0]);
 993                 } else if (prefixIs(key, "fullbynames:")) {
 994                         // Return a full name list. Suitable for further names inside a
 995                         // bibliography item // (such as "ed. by ...")
 996                         docstring const kind = operator[](from_ascii(key.substr(10)));
 997                         ret = getAuthorList(&buf, kind, true, false, true, false);
 998                         if (ci.forceUpperCase && isLowerCase(ret[0]))
 999                                 ret[0] = uppercase(ret[0]);
1000                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1001                         // Special key to provide abbreviated name lists,
1002                         // irrespective of maxcitenames. Suitable for further names inside a
1003                         // bibliography item // (such as "ed. by ...")
1004                         docstring const kind = operator[](from_ascii(key.substr(15)));
1005                         ret = getAuthorList(&buf, kind, false, true, true, false);
1006                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1007                                 ret[0] = uppercase(ret[0]);
1008                 } else if (key == "abbrvciteauthor") {
1009                         // Special key to provide abbreviated author or
1010                         // editor names (suitable for citation labels),
1011                         // with respect to maxcitenames.
1012                         ret = getAuthorOrEditorList(&buf, false, false);
1013                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1014                                 ret[0] = uppercase(ret[0]);
1015                 } else if (key == "fullciteauthor") {
1016                         // Return a full author or editor list (for citation labels)
1017                         ret = getAuthorOrEditorList(&buf, true, false);
1018                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1019                                 ret[0] = uppercase(ret[0]);
1020                 } else if (key == "forceabbrvciteauthor") {
1021                         // Special key to provide abbreviated author or
1022                         // editor names (suitable for citation labels),
1023                         // irrespective of maxcitenames.
1024                         ret = getAuthorOrEditorList(&buf, false, true);
1025                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1026                                 ret[0] = uppercase(ret[0]);
1027                 } else if (key == "bibentry") {
1028                         // Special key to provide the full bibliography entry: see getInfo()
1029                         CiteEngineType const engine_type = buf.params().citeEngineType();
1030                         DocumentClass const & dc = buf.params().documentClass();
1031                         docstring const & format =
1032                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1033                         int counter = 0;
1034                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1035                 } else if (key == "textbefore")
1036                         ret = ci.textBefore;
1037                 else if (key == "textafter")
1038                         ret = ci.textAfter;
1039                 else if (key == "curpretext")
1040                         ret = ci.getPretexts()[bib_key_];
1041                 else if (key == "curposttext")
1042                         ret = ci.getPosttexts()[bib_key_];
1043                 else if (key == "year")
1044                         ret = getYear();
1045         }
1046
1047         if (cleanit)
1048                 ret = html::cleanAttr(ret);
1049
1050         // make sure it is not too big
1051         support::truncateWithEllipsis(ret, maxsize);
1052         return ret;
1053 }
1054
1055
1056 //////////////////////////////////////////////////////////////////////
1057 //
1058 // BiblioInfo
1059 //
1060 //////////////////////////////////////////////////////////////////////
1061
1062 namespace {
1063
1064 // A functor for use with sort, leading to case insensitive sorting
1065 class compareNoCase: public binary_function<docstring, docstring, bool>
1066 {
1067 public:
1068         bool operator()(docstring const & s1, docstring const & s2) const {
1069                 return compare_no_case(s1, s2) < 0;
1070         }
1071 };
1072
1073 } // namespace anon
1074
1075
1076 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1077 {
1078         vector<docstring> result;
1079         if (!data.isBibTeX())
1080                 return result;
1081         // Legacy crossref field. This is not nestable.
1082         if (!nested && !data["crossref"].empty()) {
1083                 docstring const xrefkey = data["crossref"];
1084                 result.push_back(xrefkey);
1085                 // However, check for nested xdatas
1086                 BiblioInfo::const_iterator it = find(xrefkey);
1087                 if (it != end()) {
1088                         BibTeXInfo const & xref = it->second;
1089                         vector<docstring> const nxdata = getXRefs(xref, true);
1090                         if (!nxdata.empty())
1091                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1092                 }
1093         }
1094         // Biblatex's xdata field. Infinitely nestable.
1095         // XData field can consist of a comma-separated list of keys
1096         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1097         if (!xdatakeys.empty()) {
1098                 vector<docstring>::const_iterator xit = xdatakeys.begin();
1099                 vector<docstring>::const_iterator xen = xdatakeys.end();
1100                 for (; xit != xen; ++xit) {
1101                         docstring const xdatakey = *xit;
1102                         result.push_back(xdatakey);
1103                         BiblioInfo::const_iterator it = find(xdatakey);
1104                         if (it != end()) {
1105                                 BibTeXInfo const & xdata = it->second;
1106                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1107                                 if (!nxdata.empty())
1108                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1109                         }
1110                 }
1111         }
1112         return result;
1113 }
1114
1115
1116 vector<docstring> const BiblioInfo::getKeys() const
1117 {
1118         vector<docstring> bibkeys;
1119         BiblioInfo::const_iterator it  = begin();
1120         for (; it != end(); ++it)
1121                 bibkeys.push_back(it->first);
1122         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1123         return bibkeys;
1124 }
1125
1126
1127 vector<docstring> const BiblioInfo::getFields() const
1128 {
1129         vector<docstring> bibfields;
1130         set<docstring>::const_iterator it = field_names_.begin();
1131         set<docstring>::const_iterator end = field_names_.end();
1132         for (; it != end; ++it)
1133                 bibfields.push_back(*it);
1134         sort(bibfields.begin(), bibfields.end());
1135         return bibfields;
1136 }
1137
1138
1139 vector<docstring> const BiblioInfo::getEntries() const
1140 {
1141         vector<docstring> bibentries;
1142         set<docstring>::const_iterator it = entry_types_.begin();
1143         set<docstring>::const_iterator end = entry_types_.end();
1144         for (; it != end; ++it)
1145                 bibentries.push_back(*it);
1146         sort(bibentries.begin(), bibentries.end());
1147         return bibentries;
1148 }
1149
1150
1151 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1152 {
1153         BiblioInfo::const_iterator it = find(key);
1154         if (it == end())
1155                 return docstring();
1156         BibTeXInfo const & data = it->second;
1157         return data.getAuthorOrEditorList(&buf, false);
1158 }
1159
1160
1161 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1162 {
1163         BiblioInfo::const_iterator it = find(key);
1164         if (it == end())
1165                 return docstring();
1166         BibTeXInfo const & data = it->second;
1167         return data.citeNumber();
1168 }
1169
1170
1171 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1172 {
1173         BiblioInfo::const_iterator it = find(key);
1174         if (it == end())
1175                 return docstring();
1176         BibTeXInfo const & data = it->second;
1177         docstring year = data.getYear();
1178         if (year.empty()) {
1179                 // let's try the crossrefs
1180                 vector<docstring> const xrefs = getXRefs(data);
1181                 if (xrefs.empty())
1182                         // no luck
1183                         return docstring();
1184                 vector<docstring>::const_iterator it = xrefs.begin();
1185                 vector<docstring>::const_iterator en = xrefs.end();
1186                 for (; it != en; ++it) {
1187                         BiblioInfo::const_iterator const xrefit = find(*it);
1188                         if (xrefit == end())
1189                                 continue;
1190                         BibTeXInfo const & xref_data = xrefit->second;
1191                         year = xref_data.getYear();
1192                         if (!year.empty())
1193                                 // success!
1194                                 break;
1195                 }
1196         }
1197         if (use_modifier && data.modifier() != 0)
1198                 year += data.modifier();
1199         return year;
1200 }
1201
1202
1203 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1204 {
1205         docstring const year = getYear(key, use_modifier);
1206         if (year.empty())
1207                 return buf.B_("No year");
1208         return year;
1209 }
1210
1211
1212 docstring const BiblioInfo::getInfo(docstring const & key,
1213         Buffer const & buf, CiteItem const & ci) const
1214 {
1215         BiblioInfo::const_iterator it = find(key);
1216         if (it == end())
1217                 return docstring(_("Bibliography entry not found!"));
1218         BibTeXInfo const & data = it->second;
1219         BibTeXInfoList xrefptrs;
1220         vector<docstring> const xrefs = getXRefs(data);
1221         if (!xrefs.empty()) {
1222                 vector<docstring>::const_iterator it = xrefs.begin();
1223                 vector<docstring>::const_iterator en = xrefs.end();
1224                 for (; it != en; ++it) {
1225                         BiblioInfo::const_iterator const xrefit = find(*it);
1226                         if (xrefit != end())
1227                                 xrefptrs.push_back(&(xrefit->second));
1228                 }
1229         }
1230         return data.getInfo(xrefptrs, buf, ci);
1231 }
1232
1233
1234 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1235         Buffer const & buf, string const & style, CiteItem const & ci) const
1236 {
1237         size_t max_size = ci.max_size;
1238         // shorter makes no sense
1239         LASSERT(max_size >= 16, max_size = 16);
1240
1241         // we can't display more than 10 of these, anyway
1242         bool const too_many_keys = keys.size() > 10;
1243         if (too_many_keys)
1244                 keys.resize(10);
1245
1246         CiteEngineType const engine_type = buf.params().citeEngineType();
1247         DocumentClass const & dc = buf.params().documentClass();
1248         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1249         docstring ret = format;
1250         vector<docstring>::const_iterator key = keys.begin();
1251         vector<docstring>::const_iterator ken = keys.end();
1252         for (int i = 0; key != ken; ++key, ++i) {
1253                 BiblioInfo::const_iterator it = find(*key);
1254                 BibTeXInfo empty_data;
1255                 empty_data.key(*key);
1256                 BibTeXInfo & data = empty_data;
1257                 vector<BibTeXInfo const *> xrefptrs;
1258                 if (it != end()) {
1259                         data = it->second;
1260                         vector<docstring> const xrefs = getXRefs(data);
1261                         if (!xrefs.empty()) {
1262                                 vector<docstring>::const_iterator it = xrefs.begin();
1263                                 vector<docstring>::const_iterator en = xrefs.end();
1264                                 for (; it != en; ++it) {
1265                                         BiblioInfo::const_iterator const xrefit = find(*it);
1266                                         if (xrefit != end())
1267                                                 xrefptrs.push_back(&(xrefit->second));
1268                                 }
1269                         }
1270                 }
1271                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1272         }
1273
1274         if (too_many_keys)
1275                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1276         support::truncateWithEllipsis(ret, max_size);
1277         return ret;
1278 }
1279
1280
1281 bool BiblioInfo::isBibtex(docstring const & key) const
1282 {
1283         docstring key1;
1284         split(key, key1, ',');
1285         BiblioInfo::const_iterator it = find(key1);
1286         if (it == end())
1287                 return false;
1288         return it->second.isBibTeX();
1289 }
1290
1291
1292 vector<docstring> const BiblioInfo::getCiteStrings(
1293         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1294         Buffer const & buf, CiteItem const & ci) const
1295 {
1296         if (empty())
1297                 return vector<docstring>();
1298
1299         string style;
1300         vector<docstring> vec(styles.size());
1301         for (size_t i = 0; i != vec.size(); ++i) {
1302                 style = styles[i].name;
1303                 vec[i] = getLabel(keys, buf, style, ci);
1304         }
1305
1306         return vec;
1307 }
1308
1309
1310 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1311 {
1312         bimap_.insert(info.begin(), info.end());
1313         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1314         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1315 }
1316
1317
1318 namespace {
1319
1320 // used in xhtml to sort a list of BibTeXInfo objects
1321 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1322 {
1323         docstring const lauth = lhs->getAuthorOrEditorList();
1324         docstring const rauth = rhs->getAuthorOrEditorList();
1325         docstring const lyear = lhs->getYear();
1326         docstring const ryear = rhs->getYear();
1327         docstring const ltitl = lhs->operator[]("title");
1328         docstring const rtitl = rhs->operator[]("title");
1329         return  (lauth < rauth)
1330                 || (lauth == rauth && lyear < ryear)
1331                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1332 }
1333
1334 }
1335
1336
1337 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1338 {
1339         cited_entries_.clear();
1340         // We are going to collect all the citation keys used in the document,
1341         // getting them from the TOC.
1342         // FIXME We may want to collect these differently, in the first case,
1343         // so that we might have them in order of appearance.
1344         set<docstring> citekeys;
1345         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1346         Toc::const_iterator it = toc->begin();
1347         Toc::const_iterator const en = toc->end();
1348         for (; it != en; ++it) {
1349                 if (it->str().empty())
1350                         continue;
1351                 vector<docstring> const keys = getVectorFromString(it->str());
1352                 citekeys.insert(keys.begin(), keys.end());
1353         }
1354         if (citekeys.empty())
1355                 return;
1356
1357         // We have a set of the keys used in this document.
1358         // We will now convert it to a list of the BibTeXInfo objects used in
1359         // this document...
1360         vector<BibTeXInfo const *> bi;
1361         set<docstring>::const_iterator cit = citekeys.begin();
1362         set<docstring>::const_iterator const cen = citekeys.end();
1363         for (; cit != cen; ++cit) {
1364                 BiblioInfo::const_iterator const bt = find(*cit);
1365                 if (bt == end() || !bt->second.isBibTeX())
1366                         continue;
1367                 bi.push_back(&(bt->second));
1368         }
1369         // ...and sort it.
1370         sort(bi.begin(), bi.end(), lSorter);
1371
1372         // Now we can write the sorted keys
1373         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1374         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1375         for (; bit != ben; ++bit)
1376                 cited_entries_.push_back((*bit)->key());
1377 }
1378
1379
1380 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1381 {
1382         collectCitedEntries(buf);
1383         CiteEngineType const engine_type = buf.params().citeEngineType();
1384         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1385
1386         int keynumber = 0;
1387         char modifier = 0;
1388         // used to remember the last one we saw
1389         // we'll be comparing entries to see if we need to add
1390         // modifiers, like "1984a"
1391         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1392
1393         vector<docstring>::const_iterator it = cited_entries_.begin();
1394         vector<docstring>::const_iterator const en = cited_entries_.end();
1395         for (; it != en; ++it) {
1396                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1397                 // this shouldn't happen, but...
1398                 if (biit == bimap_.end())
1399                         // ...fail gracefully, anyway.
1400                         continue;
1401                 BibTeXInfo & entry = biit->second;
1402                 if (numbers) {
1403                         docstring const num = convert<docstring>(++keynumber);
1404                         entry.setCiteNumber(num);
1405                 } else {
1406                         // The first test here is checking whether this is the first
1407                         // time through the loop. If so, then we do not have anything
1408                         // with which to compare.
1409                         if (last != bimap_.end()
1410                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1411                             // we access the year via getYear() so as to get it from the xref,
1412                             // if we need to do so
1413                             && getYear(entry.key()) == getYear(last->second.key())) {
1414                                 if (modifier == 0) {
1415                                         // so the last one should have been 'a'
1416                                         last->second.setModifier('a');
1417                                         modifier = 'b';
1418                                 } else if (modifier == 'z')
1419                                         modifier = 'A';
1420                                 else
1421                                         modifier++;
1422                         } else {
1423                                 modifier = 0;
1424                         }
1425                         entry.setModifier(modifier);
1426                         // remember the last one
1427                         last = biit;
1428                 }
1429         }
1430         // Set the labels
1431         it = cited_entries_.begin();
1432         for (; it != en; ++it) {
1433                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1434                 // this shouldn't happen, but...
1435                 if (biit == bimap_.end())
1436                         // ...fail gracefully, anyway.
1437                         continue;
1438                 BibTeXInfo & entry = biit->second;
1439                 if (numbers) {
1440                         entry.label(entry.citeNumber());
1441                 } else {
1442                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1443                         // we do it this way so as to access the xref, if necessary
1444                         // note that this also gives us the modifier
1445                         docstring const year = getYear(*it, buf, true);
1446                         if (!auth.empty() && !year.empty())
1447                                 entry.label(auth + ' ' + year);
1448                         else
1449                                 entry.label(entry.key());
1450                 }
1451         }
1452 }
1453
1454
1455 //////////////////////////////////////////////////////////////////////
1456 //
1457 // CitationStyle
1458 //
1459 //////////////////////////////////////////////////////////////////////
1460
1461
1462 CitationStyle citationStyleFromString(string const & command,
1463                                       BufferParams const & params)
1464 {
1465         CitationStyle cs;
1466         if (command.empty())
1467                 return cs;
1468
1469         string const alias = params.getCiteAlias(command);
1470         string cmd = alias.empty() ? command : alias;
1471         if (isUpperCase(command[0])) {
1472                 cs.forceUpperCase = true;
1473                 cmd[0] = lowercase(cmd[0]);
1474         }
1475
1476         size_t const n = command.size() - 1;
1477         if (command[n] == '*') {
1478                 cs.hasStarredVersion = true;
1479                 if (suffixIs(cmd, '*'))
1480                         cmd = cmd.substr(0, cmd.size() - 1);
1481         }
1482
1483         cs.name = cmd;
1484         return cs;
1485 }
1486
1487
1488 string citationStyleToString(const CitationStyle & cs, bool const latex)
1489 {
1490         string cmd = latex ? cs.cmd : cs.name;
1491         if (cs.forceUpperCase)
1492                 cmd[0] = uppercase(cmd[0]);
1493         if (cs.hasStarredVersion)
1494                 cmd += '*';
1495         return cmd;
1496 }
1497
1498 } // namespace lyx