src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18 #include "Buffer.h"
  19 #include "BufferParams.h"
  20 #include "buffer_funcs.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "InsetIterator.h"
  24 #include "Language.h"
  25 #include "output_xhtml.h"
  26 #include "Paragraph.h"
  27 #include "TextClass.h"
  28 #include "TocBackend.h"
  29
  30 #include "support/convert.h"
  31 #include "support/debug.h"
  32 #include "support/docstream.h"
  33 #include "support/gettext.h"
  34 #include "support/lassert.h"
  35 #include "support/lstrings.h"
  36 #include "support/regex.h"
  37 #include "support/textutils.h"
  38
  39 #include <map>
  40 #include <set>
  41
  42 using namespace std;
  43 using namespace lyx::support;
  44
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 // Remove placeholders from names
  51 docstring renormalize(docstring const & input)
  52 {
  53         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  54         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  55 }
  56
  57
  58 // Split the surname into prefix ("von-part") and family name
  59 pair<docstring, docstring> parseSurname(docstring const & sname)
  60 {
  61         // Split the surname into its tokens
  62         vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
  63         if (pieces.size() < 2)
  64                 return make_pair(docstring(), sname);
  65
  66         // Now we look for pieces that begin with a lower case letter.
  67         // All except for the very last token constitute the "von-part".
  68         docstring prefix;
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator const en = pieces.end();
  71         bool first = true;
  72         for (; it != en; ++it) {
  73                 if ((*it).empty())
  74                         continue;
  75                 // If this is the last piece, then what we now have is
  76                 // the family name, notwithstanding the casing.
  77                 if (it + 1 == en)
  78                         break;
  79                 char_type const c = (*it)[0];
  80                 // If the piece starts with a upper case char, we assume
  81                 // this is part of the surname.
  82                 if (!isLower(c))
  83                         break;
  84                 // Nothing of the former, so add this piece to the prename
  85                 if (!first)
  86                         prefix += " ";
  87                 else
  88                         first = false;
  89                 prefix += *it;
  90         }
  91
  92         // Reconstruct the family name.
  93         // Note that if we left the loop with because it + 1 == en,
  94         // then this will still do the right thing, i.e., make surname
  95         // just be the last piece.
  96         docstring surname;
  97         first = true;
  98         for (; it != en; ++it) {
  99                 if (!first)
 100                         surname += " ";
 101                 else
 102                         first = false;
 103                 surname += *it;
 104         }
 105         return make_pair(prefix, surname);
 106 }
 107
 108
 109 struct name_parts {
 110         docstring surname;
 111         docstring prename;
 112         docstring suffix;
 113         docstring prefix;
 114 };
 115
 116
 117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
 118 name_parts nameParts(docstring const & iname)
 119 {
 120         name_parts res;
 121         if (iname.empty())
 122                 return res;
 123
 124         // First we check for goupings (via {...}) and replace blanks and
 125         // commas inside groups with temporary placeholders
 126         docstring name;
 127         int gl = 0;
 128         docstring::const_iterator p = iname.begin();
 129         while (p != iname.end()) {
 130                 // count grouping level
 131                 if (*p == '{')
 132                         ++gl;
 133                 else if (*p == '}')
 134                         --gl;
 135                 // generate string with probable placeholders
 136                 if (*p == ' ' && gl > 0)
 137                         name += from_ascii("$$space!");
 138                 else if (*p == ',' && gl > 0)
 139                         name += from_ascii("$$comma!");
 140                 else
 141                         name += *p;
 142                 ++p;
 143         }
 144
 145         // Now we look for a comma, and take the last name to be everything
 146         // preceding the right-most one, so that we also get the name suffix
 147         // (aka "jr" part).
 148         vector<docstring> pieces = getVectorFromString(name);
 149         if (pieces.size() > 1) {
 150                 // Whether we have a name suffix or not, the prename is
 151                 // always last item
 152                 res.prename = renormalize(pieces.back());
 153                 // The family name, conversely, is always the first item.
 154                 // However, it might contain a prefix (aka "von" part)
 155                 docstring const sname = pieces.front();
 156                 res.prefix = renormalize(parseSurname(sname).first);
 157                 res.surname = renormalize(parseSurname(sname).second);
 158                 // If we have three pieces (the maximum allowed by BibTeX),
 159                 // the second one is the name suffix.
 160                 if (pieces.size() > 2)
 161                         res.suffix = renormalize(pieces.at(1));
 162                 return res;
 163         }
 164
 165         // OK, so now we want to look for the last name.
 166         // Split on spaces, to get various tokens.
 167         pieces = getVectorFromString(name, from_ascii(" "));
 168         // No space: Only a family name given
 169         if (pieces.size() < 2) {
 170                 res.surname = renormalize(pieces.back());
 171                 return res;
 172         }
 173         // If we get two pieces, assume "prename surname"
 174         if (pieces.size() == 2) {
 175                 res.prename = renormalize(pieces.front());
 176                 res.surname = renormalize(pieces.back());
 177                 return res;
 178         }
 179
 180         // More than 3 pieces: A name prefix (aka "von" part) might be included.
 181         // We look for the first piece that begins with a lower case letter
 182         // (which is the name prefix, if it is not the last token) or the last token.
 183         docstring prename;
 184         vector<docstring>::const_iterator it = pieces.begin();
 185         vector<docstring>::const_iterator const en = pieces.end();
 186         bool first = true;
 187         for (; it != en; ++it) {
 188                 if ((*it).empty())
 189                         continue;
 190                 char_type const c = (*it)[0];
 191                 // If the piece starts with a lower case char, we assume
 192                 // this is the name prefix and thus prename is complete.
 193                 if (isLower(c))
 194                         break;
 195                 // Same if this is the last piece, which is always the surname.
 196                 if (it + 1 == en)
 197                         break;
 198                 // Nothing of the former, so add this piece to the prename
 199                 if (!first)
 200                         prename += " ";
 201                 else
 202                         first = false;
 203                 prename += *it;
 204         }
 205
 206         // Now reconstruct the family name and strip the prefix.
 207         // Note that if we left the loop because it + 1 == en,
 208         // then this will still do the right thing, i.e., make surname
 209         // just be the last piece.
 210         docstring surname;
 211         first = true;
 212         for (; it != en; ++it) {
 213                 if (!first)
 214                         surname += " ";
 215                 else
 216                         first = false;
 217                 surname += *it;
 218         }
 219         res.prename = renormalize(prename);
 220         res.prefix = renormalize(parseSurname(surname).first);
 221         res.surname = renormalize(parseSurname(surname).second);
 222         return res;
 223 }
 224
 225
 226 docstring constructName(docstring const & name, string const scheme)
 227 {
 228         // re-constructs a name from name parts according
 229         // to a given scheme
 230         docstring const prename = nameParts(name).prename;
 231         docstring const surname = nameParts(name).surname;
 232         docstring const prefix = nameParts(name).prefix;
 233         docstring const suffix = nameParts(name).suffix;
 234         string res = scheme;
 235         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 236         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 237         static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 238         smatch sub;
 239         // Changing the first parameter of regex_match() may corrupt the
 240         // second one. In this case we use the temporary string tmp.
 241         if (regex_match(scheme, sub, reg1)) {
 242                 res = sub.str(1);
 243                 if (!prename.empty())
 244                         res += sub.str(3);
 245                 res += sub.str(5);
 246         }
 247         if (regex_match(res, sub, reg2)) {
 248                 string tmp = sub.str(1);
 249                 if (!suffix.empty())
 250                         tmp += sub.str(3);
 251                 res = tmp + sub.str(5);
 252         }
 253         if (regex_match(res, sub, reg3)) {
 254                 string tmp = sub.str(1);
 255                 if (!prefix.empty())
 256                         tmp += sub.str(3);
 257                 res = tmp + sub.str(5);
 258         }
 259         docstring result = from_ascii(res);
 260         result = subst(result, from_ascii("%prename%"), prename);
 261         result = subst(result, from_ascii("%surname%"), surname);
 262         result = subst(result, from_ascii("%prefix%"), prefix);
 263         result = subst(result, from_ascii("%suffix%"), suffix);
 264         return result;
 265 }
 266
 267
 268 vector<docstring> const getAuthors(docstring const & author)
 269 {
 270         // We check for goupings (via {...}) and only consider " and "
 271         // outside groups as author separator. This is to account
 272         // for cases such as {{Barnes and Noble, Inc.}}, which
 273         // need to be treated as one single family name.
 274         // We use temporary placeholders in order to differentiate the
 275         // diverse " and " cases.
 276
 277         // First, we temporarily replace all ampersands. It is rather unusual
 278         // in author names, but can happen (consider cases such as "C \& A Corp.").
 279         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 280         // Then, we temporarily make all " and " strings to ampersands in order
 281         // to handle them later on a per-char level.
 282         iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
 283         // Now we traverse through the string and replace the "&" by the proper
 284         // output in- and outside groups
 285         docstring name;
 286         int gl = 0;
 287         docstring::const_iterator p = iname.begin();
 288         while (p != iname.end()) {
 289                 // count grouping level
 290                 if (*p == '{')
 291                         ++gl;
 292                 else if (*p == '}')
 293                         --gl;
 294                 // generate string with probable placeholders
 295                 if (*p == '&') {
 296                         if (gl > 0)
 297                                 // Inside groups, we output "and"
 298                                 name += from_ascii("and");
 299                         else
 300                                 // Outside groups, we output a separator
 301                                 name += from_ascii("$$namesep!");
 302                 }
 303                 else
 304                         name += *p;
 305                 ++p;
 306         }
 307
 308         // re-insert the literal ampersands
 309         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 310
 311         // Now construct the actual vector
 312         return getVectorFromString(name, from_ascii(" $$namesep! "));
 313 }
 314
 315
 316 bool multipleAuthors(docstring const author)
 317 {
 318         return getAuthors(author).size() > 1;
 319 }
 320
 321
 322 // converts a string containing LaTeX commands into unicode
 323 // for display.
 324 docstring convertLaTeXCommands(docstring const & str)
 325 {
 326         docstring val = str;
 327         docstring ret;
 328
 329         bool scanning_cmd = false;
 330         bool scanning_math = false;
 331         bool escaped = false; // used to catch \$, etc.
 332         while (!val.empty()) {
 333                 char_type const ch = val[0];
 334
 335                 // if we're scanning math, we output everything until we
 336                 // find an unescaped $, at which point we break out.
 337                 if (scanning_math) {
 338                         if (escaped)
 339                                 escaped = false;
 340                         else if (ch == '\\')
 341                                 escaped = true;
 342                         else if (ch == '$')
 343                                 scanning_math = false;
 344                         ret += ch;
 345                         val = val.substr(1);
 346                         continue;
 347                 }
 348
 349                 // if we're scanning a command name, then we just
 350                 // discard characters until we hit something that
 351                 // isn't alpha.
 352                 if (scanning_cmd) {
 353                         if (isAlphaASCII(ch)) {
 354                                 val = val.substr(1);
 355                                 escaped = false;
 356                                 continue;
 357                         }
 358                         // so we're done with this command.
 359                         // now we fall through and check this character.
 360                         scanning_cmd = false;
 361                 }
 362
 363                 // was the last character a \? If so, then this is something like:
 364                 // \\ or \$, so we'll just output it. That's probably not always right...
 365                 if (escaped) {
 366                         // exception: output \, as THIN SPACE
 367                         if (ch == ',')
 368                                 ret.push_back(0x2009);
 369                         else
 370                                 ret += ch;
 371                         val = val.substr(1);
 372                         escaped = false;
 373                         continue;
 374                 }
 375
 376                 if (ch == '$') {
 377                         ret += ch;
 378                         val = val.substr(1);
 379                         scanning_math = true;
 380                         continue;
 381                 }
 382
 383                 // Change text mode accents in the form
 384                 // {\v a} to \v{a} (see #9340).
 385                 // FIXME: This is a sort of mini-tex2lyx.
 386                 //        Use the real tex2lyx instead!
 387                 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
 388                 if (lyx::regex_search(to_utf8(val), tma_reg)) {
 389                         val = val.substr(1);
 390                         val.replace(2, 1, from_ascii("{"));
 391                         continue;
 392                 }
 393
 394                 // Apart from the above, we just ignore braces
 395                 if (ch == '{' || ch == '}') {
 396                         val = val.substr(1);
 397                         continue;
 398                 }
 399
 400                 // we're going to check things that look like commands, so if
 401                 // this doesn't, just output it.
 402                 if (ch != '\\') {
 403                         ret += ch;
 404                         val = val.substr(1);
 405                         continue;
 406                 }
 407
 408                 // ok, could be a command of some sort
 409                 // let's see if it corresponds to some unicode
 410                 // unicodesymbols has things in the form: \"{u},
 411                 // whereas we may see things like: \"u. So we'll
 412                 // look for that and change it, if necessary.
 413                 // FIXME: This is a sort of mini-tex2lyx.
 414                 //        Use the real tex2lyx instead!
 415                 static lyx::regex const reg("^\\\\\\W\\w");
 416                 if (lyx::regex_search(to_utf8(val), reg)) {
 417                         val.insert(3, from_ascii("}"));
 418                         val.insert(2, from_ascii("{"));
 419                 }
 420                 bool termination;
 421                 docstring rem;
 422                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 423                                 Encodings::TEXT_CMD, termination, rem);
 424                 if (!cnvtd.empty()) {
 425                         // it did, so we'll take that bit and proceed with what's left
 426                         ret += cnvtd;
 427                         val = rem;
 428                         continue;
 429                 }
 430                 // it's a command of some sort
 431                 scanning_cmd = true;
 432                 escaped = true;
 433                 val = val.substr(1);
 434         }
 435         return ret;
 436 }
 437
 438
 439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 440 docstring processRichtext(docstring const & str, bool richtext)
 441 {
 442         docstring val = str;
 443         docstring ret;
 444
 445         bool scanning_rich = false;
 446         while (!val.empty()) {
 447                 char_type const ch = val[0];
 448                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 449                         // beginning of rich text
 450                         scanning_rich = true;
 451                         val = val.substr(2);
 452                         continue;
 453                 }
 454                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 455                         // end of rich text
 456                         scanning_rich = false;
 457                         val = val.substr(2);
 458                         continue;
 459                 }
 460                 if (richtext) {
 461                         if (scanning_rich)
 462                                 ret += ch;
 463                         else {
 464                                 // we need to escape '<' and '>'
 465                                 if (ch == '<')
 466                                         ret += "&lt;";
 467                                 else if (ch == '>')
 468                                         ret += "&gt;";
 469                                 else
 470                                         ret += ch;
 471                         }
 472                 } else if (!scanning_rich /* && !richtext */)
 473                         ret += ch;
 474                 // else the character is discarded, which will happen only if
 475                 // richtext == false and we are scanning rich text
 476                 val = val.substr(1);
 477         }
 478         return ret;
 479 }
 480
 481 } // namespace
 482
 483
 484 //////////////////////////////////////////////////////////////////////
 485 //
 486 // BibTeXInfo
 487 //
 488 //////////////////////////////////////////////////////////////////////
 489
 490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 491         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
 492           modifier_(0)
 493 {}
 494
 495
 496
 497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 498                                           bool full, bool forceshort) const
 499 {
 500         docstring author = operator[]("author");
 501         if (author.empty())
 502                 author = operator[]("editor");
 503
 504         return getAuthorList(buf, author, full, forceshort);
 505 }
 506
 507
 508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 509                 docstring const & author, bool const full, bool const forceshort,
 510                 bool const allnames, bool const beginning) const
 511 {
 512         // Maxnames treshold depend on engine
 513         size_t maxnames = buf ?
 514                 buf->params().documentClass().max_citenames() : 2;
 515
 516         if (!is_bibtex_) {
 517                 docstring const opt = label();
 518                 if (opt.empty())
 519                         return docstring();
 520
 521                 docstring authors;
 522                 docstring const remainder = trim(split(opt, authors, '('));
 523                 if (remainder.empty())
 524                         // in this case, we didn't find a "(",
 525                         // so we don't have author (year)
 526                         return docstring();
 527                 return authors;
 528         }
 529
 530         if (author.empty())
 531                 return author;
 532
 533         // OK, we've got some names. Let's format them.
 534         // Try to split the author list
 535         vector<docstring> const authors = getAuthors(author);
 536
 537         docstring retval;
 538
 539         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 540                                                : ENGINE_TYPE_DEFAULT;
 541
 542         // These are defined in the styles
 543         string const etal =
 544                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
 545                     : " et al.";
 546         string const namesep =
 547                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
 548                    : ", ";
 549         string const lastnamesep =
 550                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
 551                     : ", and ";
 552         string const pairnamesep =
 553                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
 554                      : " and ";
 555         string firstnameform =
 556                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 557                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 558         if (!beginning)
 559                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 560                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 561         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 562                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 563         if (!beginning)
 564                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 565                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 566         string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
 567                              : "{%prefix%[[%prefix% ]]}%surname%";
 568
 569         // Shorten the list (with et al.) if forceshort is set
 570         // and the list can actually be shortened, else if maxcitenames
 571         // is passed and full is not set.
 572         bool shorten = forceshort && authors.size() > 1;
 573         vector<docstring>::const_iterator it = authors.begin();
 574         vector<docstring>::const_iterator en = authors.end();
 575         for (size_t i = 0; it != en; ++it, ++i) {
 576                 if (i >= maxnames && !full) {
 577                         shorten = true;
 578                         break;
 579                 }
 580                 if (*it == "others") {
 581                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 582                         break;
 583                 }
 584                 if (i > 0 && i == authors.size() - 1) {
 585                         if (authors.size() == 2)
 586                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 587                         else
 588                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 589                 } else if (i > 0)
 590                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 591                 if (allnames)
 592                         retval += (i == 0) ? constructName(*it, firstnameform)
 593                                 : constructName(*it, othernameform);
 594                 else
 595                         retval += constructName(*it, citenameform);
 596         }
 597         if (shorten) {
 598                 if (allnames)
 599                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 600                 else
 601                         retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 602         }
 603
 604         return convertLaTeXCommands(retval);
 605 }
 606
 607
 608 docstring const BibTeXInfo::getYear() const
 609 {
 610         if (is_bibtex_) {
 611                 // first try legacy year field
 612                 docstring year = operator[]("year");
 613                 if (!year.empty())
 614                         return year;
 615                 // now try biblatex's date field
 616                 year = operator[]("date");
 617                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 618                 // We only want the years.
 619                 static regex const yreg("[-]?([\\d]{4}).*");
 620                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 621                 smatch sm;
 622                 string const date = to_utf8(year);
 623                 if (!regex_match(date, sm, yreg))
 624                         // cannot parse year.
 625                         return docstring();
 626                 year = from_ascii(sm[1]);
 627                 // check for an endyear
 628                 if (regex_match(date, sm, ereg))
 629                         year += char_type(0x2013) + from_ascii(sm[1]);
 630                 return year;
 631         }
 632
 633         docstring const opt = label();
 634         if (opt.empty())
 635                 return docstring();
 636
 637         docstring authors;
 638         docstring tmp = split(opt, authors, '(');
 639         if (tmp.empty())
 640                 // we don't have author (year)
 641                 return docstring();
 642         docstring year;
 643         tmp = split(tmp, year, ')');
 644         return year;
 645 }
 646
 647
 648 namespace {
 649
 650 docstring parseOptions(docstring const & format, string & optkey,
 651                     docstring & ifpart, docstring & elsepart);
 652
 653 // Calls parseOptions to deal with an embedded option, such as:
 654 //   {%number%[[, no.~%number%]]}
 655 // which must appear at the start of format. ifelsepart gets the
 656 // whole of the option, and we return what's left after the option.
 657 // we return format if there is an error.
 658 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 659 {
 660         LASSERT(format[0] == '{' && format[1] == '%', return format);
 661         string optkey;
 662         docstring ifpart;
 663         docstring elsepart;
 664         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 665         if (format == rest) { // parse error
 666                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 667                 return format;
 668         }
 669         LASSERT(rest.size() <= format.size(),
 670                 { ifelsepart = docstring(); return format; });
 671         ifelsepart = format.substr(0, format.size() - rest.size());
 672         return rest;
 673 }
 674
 675
 676 // Gets a "clause" from a format string, where the clause is
 677 // delimited by '[[' and ']]'. Returns what is left after the
 678 // clause is removed, and returns format if there is an error.
 679 docstring getClause(docstring const & format, docstring & clause)
 680 {
 681         docstring fmt = format;
 682         // remove '[['
 683         fmt = fmt.substr(2);
 684         // we'll remove characters from the front of fmt as we
 685         // deal with them
 686         while (!fmt.empty()) {
 687                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 688                         // that's the end
 689                         fmt = fmt.substr(2);
 690                         break;
 691                 }
 692                 // check for an embedded option
 693                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 694                         docstring part;
 695                         docstring const rest = parseEmbeddedOption(fmt, part);
 696                         if (fmt == rest) {
 697                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 698                                 return format;
 699                         }
 700                         clause += part;
 701                         fmt = rest;
 702                 } else { // it's just a normal character
 703                                 clause += fmt[0];
 704                                 fmt = fmt.substr(1);
 705                 }
 706         }
 707         return fmt;
 708 }
 709
 710
 711 // parse an options string, which must appear at the start of the
 712 // format parameter. puts the parsed bits in optkey, ifpart, and
 713 // elsepart and returns what's left after the option is removed.
 714 // if there's an error, it returns format itself.
 715 docstring parseOptions(docstring const & format, string & optkey,
 716                     docstring & ifpart, docstring & elsepart)
 717 {
 718         LASSERT(format[0] == '{' && format[1] == '%', return format);
 719         // strip '{%'
 720         docstring fmt = format.substr(2);
 721         size_t pos = fmt.find('%'); // end of key
 722         if (pos == string::npos) {
 723                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 724                 return format;
 725         }
 726         optkey = to_utf8(fmt.substr(0, pos));
 727         fmt = fmt.substr(pos + 1);
 728         // [[format]] should be next
 729         if (fmt[0] != '[' || fmt[1] != '[') {
 730                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 731                 return format;
 732         }
 733
 734         docstring curfmt = fmt;
 735         fmt = getClause(curfmt, ifpart);
 736         if (fmt == curfmt) {
 737                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 738                 return format;
 739         }
 740
 741         if (fmt[0] == '}') // we're done, no else clause
 742                 return fmt.substr(1);
 743
 744         // else part should follow
 745         if (fmt[0] != '[' || fmt[1] != '[') {
 746                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 747                 return format;
 748         }
 749
 750         curfmt = fmt;
 751         fmt = getClause(curfmt, elsepart);
 752         // we should be done
 753         if (fmt == curfmt || fmt[0] != '}') {
 754                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 755                 return format;
 756         }
 757         return fmt.substr(1);
 758 }
 759
 760
 761 } // namespace
 762
 763 /* FIXME
 764 Bug #9131 revealed an oddity in how we are generating citation information
 765 when more than one key is given. We end up building a longer and longer format
 766 string as we go, which we then have to re-parse, over and over and over again,
 767 rather than generating the information for the individual keys and then putting
 768 all of that together. We do that to deal with the way separators work, from what
 769 I can tell, but it still feels like a hack. Fixing this would require quite a
 770 bit of work, however.
 771 */
 772 docstring BibTeXInfo::expandFormat(docstring const & format,
 773                 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
 774                 CiteItem const & ci, bool next, bool second) const
 775 {
 776         // incorrect use of macros could put us in an infinite loop
 777         static int const max_passes = 5000;
 778         // the use of overly large keys can lead to performance problems, due
 779         // to eventual attempts to convert LaTeX macros to unicode. See bug
 780         // #8944. By default, the size is limited to 128 (in CiteItem), but
 781         // for specific purposes (such as XHTML export), it needs to be enlarged
 782         // This is perhaps not the best solution, but it will have to do for now.
 783         size_t const max_keysize = ci.max_key_size;
 784         odocstringstream ret; // return value
 785         string key;
 786         bool scanning_key = false;
 787         bool scanning_rich = false;
 788
 789         CiteEngineType const engine_type = buf.params().citeEngineType();
 790         docstring fmt = format;
 791         // we'll remove characters from the front of fmt as we
 792         // deal with them
 793         while (!fmt.empty()) {
 794                 if (counter > max_passes) {
 795                         LYXERR0("Recursion limit reached while parsing `"
 796                                 << format << "'.");
 797                         return _("ERROR!");
 798                 }
 799
 800                 char_type thischar = fmt[0];
 801                 if (thischar == '%') {
 802                         // beginning or end of key
 803                         if (scanning_key) {
 804                                 // end of key
 805                                 scanning_key = false;
 806                                 // so we replace the key with its value, which may be empty
 807                                 if (key[0] == '!') {
 808                                         // macro
 809                                         string const val =
 810                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 811                                         fmt = from_utf8(val) + fmt.substr(1);
 812                                         counter += 1;
 813                                         continue;
 814                                 } else if (prefixIs(key, "B_")) {
 815                                         // a translatable bit (to the Buffer language)
 816                                         string const val =
 817                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 818                                         docstring const trans =
 819                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 820                                         ret << trans;
 821                                 } else if (key[0] == '_') {
 822                                         // a translatable bit (to the GUI language)
 823                                         string const val =
 824                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 825                                         docstring const trans =
 826                                                 translateIfPossible(from_utf8(val));
 827                                         ret << trans;
 828                                 } else {
 829                                         docstring const val =
 830                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 831                                         if (!scanning_rich)
 832                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 833                                         ret << val;
 834                                         if (!scanning_rich)
 835                                                 ret << from_ascii("{!</span>!}");
 836                                 }
 837                         } else {
 838                                 // beginning of key
 839                                 key.clear();
 840                                 scanning_key = true;
 841                         }
 842                 }
 843                 else if (thischar == '{') {
 844                         // beginning of option?
 845                         if (scanning_key) {
 846                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 847                                 return _("ERROR!");
 848                         }
 849                         if (fmt.size() > 1) {
 850                                 if (fmt[1] == '%') {
 851                                         // it is the beginning of an optional format
 852                                         string optkey;
 853                                         docstring ifpart;
 854                                         docstring elsepart;
 855                                         docstring const newfmt =
 856                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 857                                         if (newfmt == fmt) // parse error
 858                                                 return _("ERROR!");
 859                                         fmt = newfmt;
 860                                         docstring const val =
 861                                                 getValueForKey(optkey, buf, ci, xrefs);
 862                                         if (optkey == "next" && next)
 863                                                 ret << ifpart; // without expansion
 864                                         else if (optkey == "second" && second) {
 865                                                 int newcounter = 0;
 866                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 867                                                         ci, next);
 868                                         } else if (!val.empty()) {
 869                                                 int newcounter = 0;
 870                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 871                                                         ci, next);
 872                                         } else if (!elsepart.empty()) {
 873                                                 int newcounter = 0;
 874                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 875                                                         ci, next);
 876                                         }
 877                                         // fmt will have been shortened for us already
 878                                         continue;
 879                                 }
 880                                 if (fmt[1] == '!') {
 881                                         // beginning of rich text
 882                                         scanning_rich = true;
 883                                         fmt = fmt.substr(2);
 884                                         ret << from_ascii("{!");
 885                                         continue;
 886                                 }
 887                         }
 888                         // we are here if '{' was not followed by % or !.
 889                         // So it's just a character.
 890                         ret << thischar;
 891                 }
 892                 else if (scanning_rich && thischar == '!'
 893                          && fmt.size() > 1 && fmt[1] == '}') {
 894                         // end of rich text
 895                         scanning_rich = false;
 896                         fmt = fmt.substr(2);
 897                         ret << from_ascii("!}");
 898                         continue;
 899                 }
 900                 else if (scanning_key)
 901                         key += char(thischar);
 902                 else {
 903                         try {
 904                                 ret.put(thischar);
 905                         } catch (EncodingException & /* e */) {
 906                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 907                         }
 908                 }
 909                 fmt = fmt.substr(1);
 910         } // for loop
 911         if (scanning_key) {
 912                 LYXERR0("Never found end of key in `" << format << "'!");
 913                 return _("ERROR!");
 914         }
 915         if (scanning_rich) {
 916                 LYXERR0("Never found end of rich text in `" << format << "'!");
 917                 return _("ERROR!");
 918         }
 919         return ret.str();
 920 }
 921
 922
 923 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 924         Buffer const & buf, CiteItem const & ci) const
 925 {
 926         bool const richtext = ci.richtext;
 927
 928         if (!richtext && !info_.empty())
 929                 return info_;
 930         if (richtext && !info_richtext_.empty())
 931                 return info_richtext_;
 932
 933         if (!is_bibtex_) {
 934                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 935                 info_ = it->second;
 936                 return info_;
 937         }
 938
 939         CiteEngineType const engine_type = buf.params().citeEngineType();
 940         DocumentClass const & dc = buf.params().documentClass();
 941         docstring const & format =
 942                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 943         int counter = 0;
 944         info_ = expandFormat(format, xrefs, counter, buf,
 945                 ci, false, false);
 946
 947         if (info_.empty()) {
 948                 // this probably shouldn't happen
 949                 return info_;
 950         }
 951
 952         if (richtext) {
 953                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 954                 return info_richtext_;
 955         }
 956
 957         info_ = convertLaTeXCommands(processRichtext(info_, false));
 958         return info_;
 959 }
 960
 961
 962 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 963         Buffer const & buf, docstring const & format,
 964         CiteItem const & ci, bool next, bool second) const
 965 {
 966         docstring loclabel;
 967
 968         int counter = 0;
 969         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 970
 971         if (!loclabel.empty() && !next) {
 972                 loclabel = processRichtext(loclabel, ci.richtext);
 973                 loclabel = convertLaTeXCommands(loclabel);
 974         }
 975
 976         return loclabel;
 977 }
 978
 979
 980 docstring const & BibTeXInfo::operator[](docstring const & field) const
 981 {
 982         BibTeXInfo::const_iterator it = find(field);
 983         if (it != end())
 984                 return it->second;
 985         static docstring const empty_value = docstring();
 986         return empty_value;
 987 }
 988
 989
 990 docstring const & BibTeXInfo::operator[](string const & field) const
 991 {
 992         return operator[](from_ascii(field));
 993 }
 994
 995
 996 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 997         CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
 998 {
 999         // anything less is pointless
1000         LASSERT(maxsize >= 16, maxsize = 16);
1001         string key = oldkey;
1002         bool cleanit = false;
1003         if (prefixIs(oldkey, "clean:")) {
1004                 key = oldkey.substr(6);
1005                 cleanit = true;
1006         }
1007
1008         docstring ret = operator[](key);
1009         if (ret.empty() && !xrefs.empty()) {
1010                 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1011                 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1012                 for (; it != en; ++it) {
1013                         if (*it && !(**it)[key].empty()) {
1014                                 ret = (**it)[key];
1015                                 break;
1016                         }
1017                 }
1018         }
1019         if (ret.empty()) {
1020                 // some special keys
1021                 // FIXME: dialog, textbefore and textafter have nothing to do with this
1022                 if (key == "dialog" && ci.context == CiteItem::Dialog)
1023                         ret = from_ascii("x"); // any non-empty string will do
1024                 else if (key == "export" && ci.context == CiteItem::Export)
1025                         ret = from_ascii("x"); // any non-empty string will do
1026                 else if (key == "ifstar" && ci.Starred)
1027                         ret = from_ascii("x"); // any non-empty string will do
1028                 else if (key == "ifqualified" && ci.isQualified)
1029                         ret = from_ascii("x"); // any non-empty string will do
1030                 else if (key == "entrytype")
1031                         ret = entry_type_;
1032                 else if (prefixIs(key, "ifentrytype:")
1033                          && from_ascii(key.substr(12)) == entry_type_)
1034                         ret = from_ascii("x"); // any non-empty string will do
1035                 else if (key == "key")
1036                         ret = bib_key_;
1037                 else if (key == "label")
1038                         ret = label_;
1039                 else if (key == "modifier" && modifier_ != 0)
1040                         ret = modifier_;
1041                 else if (key == "numericallabel")
1042                         ret = cite_number_;
1043                 else if (prefixIs(key, "ifmultiple:")) {
1044                         // Return whether we have multiple authors
1045                         docstring const kind = operator[](from_ascii(key.substr(11)));
1046                         if (multipleAuthors(kind))
1047                                 ret = from_ascii("x"); // any non-empty string will do
1048                 }
1049                 else if (prefixIs(key, "abbrvnames:")) {
1050                         // Special key to provide abbreviated name list,
1051                         // with respect to maxcitenames. Suitable for Bibliography
1052                         // beginnings.
1053                         docstring const kind = operator[](from_ascii(key.substr(11)));
1054                         ret = getAuthorList(&buf, kind, false, false, true);
1055                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1056                                 ret[0] = uppercase(ret[0]);
1057                 } else if (prefixIs(key, "fullnames:")) {
1058                         // Return a full name list. Suitable for Bibliography
1059                         // beginnings.
1060                         docstring const kind = operator[](from_ascii(key.substr(10)));
1061                         ret = getAuthorList(&buf, kind, true, false, true);
1062                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1063                                 ret[0] = uppercase(ret[0]);
1064                 } else if (prefixIs(key, "forceabbrvnames:")) {
1065                         // Special key to provide abbreviated name lists,
1066                         // irrespective of maxcitenames. Suitable for Bibliography
1067                         // beginnings.
1068                         docstring const kind = operator[](from_ascii(key.substr(15)));
1069                         ret = getAuthorList(&buf, kind, false, true, true);
1070                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1071                                 ret[0] = uppercase(ret[0]);
1072                 } else if (prefixIs(key, "abbrvbynames:")) {
1073                         // Special key to provide abbreviated name list,
1074                         // with respect to maxcitenames. Suitable for further names inside a
1075                         // bibliography item // (such as "ed. by ...")
1076                         docstring const kind = operator[](from_ascii(key.substr(11)));
1077                         ret = getAuthorList(&buf, kind, false, false, true, false);
1078                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1079                                 ret[0] = uppercase(ret[0]);
1080                 } else if (prefixIs(key, "fullbynames:")) {
1081                         // Return a full name list. Suitable for further names inside a
1082                         // bibliography item // (such as "ed. by ...")
1083                         docstring const kind = operator[](from_ascii(key.substr(10)));
1084                         ret = getAuthorList(&buf, kind, true, false, true, false);
1085                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1086                                 ret[0] = uppercase(ret[0]);
1087                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1088                         // Special key to provide abbreviated name lists,
1089                         // irrespective of maxcitenames. Suitable for further names inside a
1090                         // bibliography item // (such as "ed. by ...")
1091                         docstring const kind = operator[](from_ascii(key.substr(15)));
1092                         ret = getAuthorList(&buf, kind, false, true, true, false);
1093                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1094                                 ret[0] = uppercase(ret[0]);
1095                 } else if (key == "abbrvciteauthor") {
1096                         // Special key to provide abbreviated author or
1097                         // editor names (suitable for citation labels),
1098                         // with respect to maxcitenames.
1099                         ret = getAuthorOrEditorList(&buf, false, false);
1100                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1101                                 ret[0] = uppercase(ret[0]);
1102                 } else if (key == "fullciteauthor") {
1103                         // Return a full author or editor list (for citation labels)
1104                         ret = getAuthorOrEditorList(&buf, true, false);
1105                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1106                                 ret[0] = uppercase(ret[0]);
1107                 } else if (key == "forceabbrvciteauthor") {
1108                         // Special key to provide abbreviated author or
1109                         // editor names (suitable for citation labels),
1110                         // irrespective of maxcitenames.
1111                         ret = getAuthorOrEditorList(&buf, false, true);
1112                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1113                                 ret[0] = uppercase(ret[0]);
1114                 } else if (key == "bibentry") {
1115                         // Special key to provide the full bibliography entry: see getInfo()
1116                         CiteEngineType const engine_type = buf.params().citeEngineType();
1117                         DocumentClass const & dc = buf.params().documentClass();
1118                         docstring const & format =
1119                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1120                         int counter = 0;
1121                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1122                 } else if (key == "textbefore")
1123                         ret = ci.textBefore;
1124                 else if (key == "textafter")
1125                         ret = ci.textAfter;
1126                 else if (key == "curpretext")
1127                         ret = ci.getPretexts()[bib_key_];
1128                 else if (key == "curposttext")
1129                         ret = ci.getPosttexts()[bib_key_];
1130                 else if (key == "year")
1131                         ret = getYear();
1132         }
1133
1134         if (cleanit)
1135                 ret = html::cleanAttr(ret);
1136
1137         // make sure it is not too big
1138         support::truncateWithEllipsis(ret, maxsize);
1139         return ret;
1140 }
1141
1142
1143 //////////////////////////////////////////////////////////////////////
1144 //
1145 // BiblioInfo
1146 //
1147 //////////////////////////////////////////////////////////////////////
1148
1149 namespace {
1150
1151 // A functor for use with sort, leading to case insensitive sorting
1152 class compareNoCase: public binary_function<docstring, docstring, bool>
1153 {
1154 public:
1155         bool operator()(docstring const & s1, docstring const & s2) const {
1156                 return compare_no_case(s1, s2) < 0;
1157         }
1158 };
1159
1160 } // namespace
1161
1162
1163 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1164 {
1165         vector<docstring> result;
1166         if (!data.isBibTeX())
1167                 return result;
1168         // Legacy crossref field. This is not nestable.
1169         if (!nested && !data["crossref"].empty()) {
1170                 docstring const xrefkey = data["crossref"];
1171                 result.push_back(xrefkey);
1172                 // However, check for nested xdatas
1173                 BiblioInfo::const_iterator it = find(xrefkey);
1174                 if (it != end()) {
1175                         BibTeXInfo const & xref = it->second;
1176                         vector<docstring> const nxdata = getXRefs(xref, true);
1177                         if (!nxdata.empty())
1178                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1179                 }
1180         }
1181         // Biblatex's xdata field. Infinitely nestable.
1182         // XData field can consist of a comma-separated list of keys
1183         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1184         if (!xdatakeys.empty()) {
1185                 vector<docstring>::const_iterator xit = xdatakeys.begin();
1186                 vector<docstring>::const_iterator xen = xdatakeys.end();
1187                 for (; xit != xen; ++xit) {
1188                         docstring const xdatakey = *xit;
1189                         result.push_back(xdatakey);
1190                         BiblioInfo::const_iterator it = find(xdatakey);
1191                         if (it != end()) {
1192                                 BibTeXInfo const & xdata = it->second;
1193                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1194                                 if (!nxdata.empty())
1195                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1196                         }
1197                 }
1198         }
1199         return result;
1200 }
1201
1202
1203 vector<docstring> const BiblioInfo::getKeys() const
1204 {
1205         vector<docstring> bibkeys;
1206         BiblioInfo::const_iterator it  = begin();
1207         for (; it != end(); ++it)
1208                 bibkeys.push_back(it->first);
1209         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1210         return bibkeys;
1211 }
1212
1213
1214 vector<docstring> const BiblioInfo::getFields() const
1215 {
1216         vector<docstring> bibfields;
1217         set<docstring>::const_iterator it = field_names_.begin();
1218         set<docstring>::const_iterator end = field_names_.end();
1219         for (; it != end; ++it)
1220                 bibfields.push_back(*it);
1221         sort(bibfields.begin(), bibfields.end());
1222         return bibfields;
1223 }
1224
1225
1226 vector<docstring> const BiblioInfo::getEntries() const
1227 {
1228         vector<docstring> bibentries;
1229         set<docstring>::const_iterator it = entry_types_.begin();
1230         set<docstring>::const_iterator end = entry_types_.end();
1231         for (; it != end; ++it)
1232                 bibentries.push_back(*it);
1233         sort(bibentries.begin(), bibentries.end());
1234         return bibentries;
1235 }
1236
1237
1238 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1239 {
1240         BiblioInfo::const_iterator it = find(key);
1241         if (it == end())
1242                 return docstring();
1243         BibTeXInfo const & data = it->second;
1244         return data.getAuthorOrEditorList(&buf, false);
1245 }
1246
1247
1248 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1249 {
1250         BiblioInfo::const_iterator it = find(key);
1251         if (it == end())
1252                 return docstring();
1253         BibTeXInfo const & data = it->second;
1254         return data.citeNumber();
1255 }
1256
1257
1258 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1259 {
1260         BiblioInfo::const_iterator it = find(key);
1261         if (it == end())
1262                 return docstring();
1263         BibTeXInfo const & data = it->second;
1264         docstring year = data.getYear();
1265         if (year.empty()) {
1266                 // let's try the crossrefs
1267                 vector<docstring> const xrefs = getXRefs(data);
1268                 if (xrefs.empty())
1269                         // no luck
1270                         return docstring();
1271                 for (docstring const & xref : xrefs) {
1272                         BiblioInfo::const_iterator const xrefit = find(xref);
1273                         if (xrefit == end())
1274                                 continue;
1275                         BibTeXInfo const & xref_data = xrefit->second;
1276                         year = xref_data.getYear();
1277                         if (!year.empty())
1278                                 // success!
1279                                 break;
1280                 }
1281         }
1282         if (use_modifier && data.modifier() != 0)
1283                 year += data.modifier();
1284         return year;
1285 }
1286
1287
1288 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1289 {
1290         docstring const year = getYear(key, use_modifier);
1291         if (year.empty())
1292                 return buf.B_("No year");
1293         return year;
1294 }
1295
1296
1297 docstring const BiblioInfo::getInfo(docstring const & key,
1298         Buffer const & buf, CiteItem const & ci) const
1299 {
1300         BiblioInfo::const_iterator it = find(key);
1301         if (it == end())
1302                 return docstring(_("Bibliography entry not found!"));
1303         BibTeXInfo const & data = it->second;
1304         BibTeXInfoList xrefptrs;
1305         vector<docstring> const xrefs = getXRefs(data);
1306         for (docstring const & xref : getXRefs(data)) {
1307                 BiblioInfo::const_iterator const xrefit = find(xref);
1308                 if (xrefit != end())
1309                         xrefptrs.push_back(&(xrefit->second));
1310         }
1311         return data.getInfo(xrefptrs, buf, ci);
1312 }
1313
1314
1315 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1316         Buffer const & buf, string const & style, CiteItem const & ci) const
1317 {
1318         size_t max_size = ci.max_size;
1319         // shorter makes no sense
1320         LASSERT(max_size >= 16, max_size = 16);
1321
1322         // we can't display more than 10 of these, anyway
1323         bool const too_many_keys = keys.size() > 10;
1324         if (too_many_keys)
1325                 keys.resize(10);
1326
1327         CiteEngineType const engine_type = buf.params().citeEngineType();
1328         DocumentClass const & dc = buf.params().documentClass();
1329         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1330         docstring ret = format;
1331         vector<docstring>::const_iterator key = keys.begin();
1332         vector<docstring>::const_iterator ken = keys.end();
1333         for (int i = 0; key != ken; ++key, ++i) {
1334                 BiblioInfo::const_iterator it = find(*key);
1335                 BibTeXInfo empty_data;
1336                 empty_data.key(*key);
1337                 BibTeXInfo & data = empty_data;
1338                 vector<BibTeXInfo const *> xrefptrs;
1339                 if (it != end()) {
1340                         data = it->second;
1341                         for (docstring const & xref : getXRefs(data)) {
1342                                 BiblioInfo::const_iterator const xrefit = find(xref);
1343                                 if (xrefit != end())
1344                                         xrefptrs.push_back(&(xrefit->second));
1345                         }
1346                 }
1347                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1348         }
1349
1350         if (too_many_keys)
1351                 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1352         support::truncateWithEllipsis(ret, max_size);
1353         return ret;
1354 }
1355
1356
1357 bool BiblioInfo::isBibtex(docstring const & key) const
1358 {
1359         docstring key1;
1360         split(key, key1, ',');
1361         BiblioInfo::const_iterator it = find(key1);
1362         if (it == end())
1363                 return false;
1364         return it->second.isBibTeX();
1365 }
1366
1367
1368 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1369         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1370         Buffer const & buf, CiteItem const & ci) const
1371 {
1372         if (empty())
1373                 return vector<pair<docstring,docstring>>();
1374
1375         string style;
1376         CiteStringMap csm(styles.size());
1377         for (size_t i = 0; i != csm.size(); ++i) {
1378                 style = styles[i].name;
1379                 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1380         }
1381
1382         return csm;
1383 }
1384
1385
1386 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1387 {
1388         bimap_.insert(info.begin(), info.end());
1389         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1390         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1391 }
1392
1393
1394 namespace {
1395
1396 // used in xhtml to sort a list of BibTeXInfo objects
1397 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1398 {
1399         docstring const lauth = lhs->getAuthorOrEditorList();
1400         docstring const rauth = rhs->getAuthorOrEditorList();
1401         docstring const lyear = lhs->getYear();
1402         docstring const ryear = rhs->getYear();
1403         docstring const ltitl = lhs->operator[]("title");
1404         docstring const rtitl = rhs->operator[]("title");
1405         return  (lauth < rauth)
1406                 || (lauth == rauth && lyear < ryear)
1407                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1408 }
1409
1410 } // namespace
1411
1412
1413 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1414 {
1415         cited_entries_.clear();
1416         // We are going to collect all the citation keys used in the document,
1417         // getting them from the TOC.
1418         // FIXME We may want to collect these differently, in the first case,
1419         // so that we might have them in order of appearance.
1420         set<docstring> citekeys;
1421         shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1422         Toc::const_iterator it = toc->begin();
1423         Toc::const_iterator const en = toc->end();
1424         for (; it != en; ++it) {
1425                 if (it->str().empty())
1426                         continue;
1427                 vector<docstring> const keys = getVectorFromString(it->str());
1428                 citekeys.insert(keys.begin(), keys.end());
1429         }
1430         if (citekeys.empty())
1431                 return;
1432
1433         // We have a set of the keys used in this document.
1434         // We will now convert it to a list of the BibTeXInfo objects used in
1435         // this document...
1436         vector<BibTeXInfo const *> bi;
1437         set<docstring>::const_iterator cit = citekeys.begin();
1438         set<docstring>::const_iterator const cen = citekeys.end();
1439         for (; cit != cen; ++cit) {
1440                 BiblioInfo::const_iterator const bt = find(*cit);
1441                 if (bt == end() || !bt->second.isBibTeX())
1442                         continue;
1443                 bi.push_back(&(bt->second));
1444         }
1445         // ...and sort it.
1446         sort(bi.begin(), bi.end(), lSorter);
1447
1448         // Now we can write the sorted keys
1449         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1450         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1451         for (; bit != ben; ++bit)
1452                 cited_entries_.push_back((*bit)->key());
1453 }
1454
1455
1456 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1457 {
1458         collectCitedEntries(buf);
1459         CiteEngineType const engine_type = buf.params().citeEngineType();
1460         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1461
1462         int keynumber = 0;
1463         char modifier = 0;
1464         // used to remember the last one we saw
1465         // we'll be comparing entries to see if we need to add
1466         // modifiers, like "1984a"
1467         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1468
1469         vector<docstring>::const_iterator it = cited_entries_.begin();
1470         vector<docstring>::const_iterator const en = cited_entries_.end();
1471         for (; it != en; ++it) {
1472                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1473                 // this shouldn't happen, but...
1474                 if (biit == bimap_.end())
1475                         // ...fail gracefully, anyway.
1476                         continue;
1477                 BibTeXInfo & entry = biit->second;
1478                 if (numbers) {
1479                         docstring const num = convert<docstring>(++keynumber);
1480                         entry.setCiteNumber(num);
1481                 } else {
1482                         // The first test here is checking whether this is the first
1483                         // time through the loop. If so, then we do not have anything
1484                         // with which to compare.
1485                         if (last != bimap_.end()
1486                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1487                             // we access the year via getYear() so as to get it from the xref,
1488                             // if we need to do so
1489                             && getYear(entry.key()) == getYear(last->second.key())) {
1490                                 if (modifier == 0) {
1491                                         // so the last one should have been 'a'
1492                                         last->second.setModifier('a');
1493                                         modifier = 'b';
1494                                 } else if (modifier == 'z')
1495                                         modifier = 'A';
1496                                 else
1497                                         modifier++;
1498                         } else {
1499                                 modifier = 0;
1500                         }
1501                         entry.setModifier(modifier);
1502                         // remember the last one
1503                         last = biit;
1504                 }
1505         }
1506         // Set the labels
1507         it = cited_entries_.begin();
1508         for (; it != en; ++it) {
1509                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1510                 // this shouldn't happen, but...
1511                 if (biit == bimap_.end())
1512                         // ...fail gracefully, anyway.
1513                         continue;
1514                 BibTeXInfo & entry = biit->second;
1515                 if (numbers) {
1516                         entry.label(entry.citeNumber());
1517                 } else {
1518                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1519                         // we do it this way so as to access the xref, if necessary
1520                         // note that this also gives us the modifier
1521                         docstring const year = getYear(*it, buf, true);
1522                         if (!auth.empty() && !year.empty())
1523                                 entry.label(auth + ' ' + year);
1524                         else
1525                                 entry.label(entry.key());
1526                 }
1527         }
1528 }
1529
1530
1531 //////////////////////////////////////////////////////////////////////
1532 //
1533 // CitationStyle
1534 //
1535 //////////////////////////////////////////////////////////////////////
1536
1537
1538 CitationStyle citationStyleFromString(string const & command,
1539                                       BufferParams const & params)
1540 {
1541         CitationStyle cs;
1542         if (command.empty())
1543                 return cs;
1544
1545         string const alias = params.getCiteAlias(command);
1546         string cmd = alias.empty() ? command : alias;
1547         if (isUpperCase(command[0])) {
1548                 cs.forceUpperCase = true;
1549                 cmd[0] = lowercase(cmd[0]);
1550         }
1551
1552         size_t const n = command.size() - 1;
1553         if (command[n] == '*') {
1554                 cs.hasStarredVersion = true;
1555                 if (suffixIs(cmd, '*'))
1556                         cmd = cmd.substr(0, cmd.size() - 1);
1557         }
1558
1559         cs.name = cmd;
1560         return cs;
1561 }
1562
1563
1564 string citationStyleToString(const CitationStyle & cs, bool const latex)
1565 {
1566         string cmd = latex ? cs.cmd : cs.name;
1567         if (cs.forceUpperCase)
1568                 cmd[0] = uppercase(cmd[0]);
1569         if (cs.hasStarredVersion)
1570                 cmd += '*';
1571         return cmd;
1572 }
1573
1574 } // namespace lyx