src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18 #include "Buffer.h"
  19 #include "BufferParams.h"
  20 #include "buffer_funcs.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "InsetIterator.h"
  24 #include "Language.h"
  25 #include "xml.h"
  26 #include "Paragraph.h"
  27 #include "TextClass.h"
  28 #include "TocBackend.h"
  29
  30 #include "support/convert.h"
  31 #include "support/debug.h"
  32 #include "support/docstream.h"
  33 #include "support/gettext.h"
  34 #include "support/lassert.h"
  35 #include "support/lstrings.h"
  36 #include "support/regex.h"
  37 #include "support/textutils.h"
  38
  39 #include <map>
  40 #include <set>
  41
  42 using namespace std;
  43 using namespace lyx::support;
  44
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 // Remove placeholders from names
  51 docstring renormalize(docstring const & input)
  52 {
  53         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  54         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  55 }
  56
  57
  58 // Split the surname into prefix ("von-part") and family name
  59 pair<docstring, docstring> parseSurname(docstring const & sname)
  60 {
  61         // Split the surname into its tokens
  62         vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
  63         if (pieces.size() < 2)
  64                 return make_pair(docstring(), sname);
  65
  66         // Now we look for pieces that begin with a lower case letter.
  67         // All except for the very last token constitute the "von-part".
  68         docstring prefix;
  69         vector<docstring>::const_iterator it = pieces.begin();
  70         vector<docstring>::const_iterator const en = pieces.end();
  71         bool first = true;
  72         for (; it != en; ++it) {
  73                 if ((*it).empty())
  74                         continue;
  75                 // If this is the last piece, then what we now have is
  76                 // the family name, notwithstanding the casing.
  77                 if (it + 1 == en)
  78                         break;
  79                 char_type const c = (*it)[0];
  80                 // If the piece starts with a upper case char, we assume
  81                 // this is part of the surname.
  82                 if (!isLower(c))
  83                         break;
  84                 // Nothing of the former, so add this piece to the prename
  85                 if (!first)
  86                         prefix += " ";
  87                 else
  88                         first = false;
  89                 prefix += *it;
  90         }
  91
  92         // Reconstruct the family name.
  93         // Note that if we left the loop with because it + 1 == en,
  94         // then this will still do the right thing, i.e., make surname
  95         // just be the last piece.
  96         docstring surname;
  97         first = true;
  98         for (; it != en; ++it) {
  99                 if (!first)
 100                         surname += " ";
 101                 else
 102                         first = false;
 103                 surname += *it;
 104         }
 105         return make_pair(prefix, surname);
 106 }
 107
 108
 109 struct name_parts {
 110         docstring surname;
 111         docstring prename;
 112         docstring suffix;
 113         docstring prefix;
 114 };
 115
 116
 117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
 118 name_parts nameParts(docstring const & iname)
 119 {
 120         name_parts res;
 121         if (iname.empty())
 122                 return res;
 123
 124         // First we check for goupings (via {...}) and replace blanks and
 125         // commas inside groups with temporary placeholders
 126         docstring name;
 127         int gl = 0;
 128         docstring::const_iterator p = iname.begin();
 129         while (p != iname.end()) {
 130                 // count grouping level
 131                 if (*p == '{')
 132                         ++gl;
 133                 else if (*p == '}')
 134                         --gl;
 135                 // generate string with probable placeholders
 136                 if (*p == ' ' && gl > 0)
 137                         name += from_ascii("$$space!");
 138                 else if (*p == ',' && gl > 0)
 139                         name += from_ascii("$$comma!");
 140                 else
 141                         name += *p;
 142                 ++p;
 143         }
 144
 145         // Now we look for a comma, and take the last name to be everything
 146         // preceding the right-most one, so that we also get the name suffix
 147         // (aka "jr" part).
 148         vector<docstring> pieces = getVectorFromString(name);
 149         if (pieces.size() > 1) {
 150                 // Whether we have a name suffix or not, the prename is
 151                 // always last item
 152                 res.prename = renormalize(pieces.back());
 153                 // The family name, conversely, is always the first item.
 154                 // However, it might contain a prefix (aka "von" part)
 155                 docstring const sname = pieces.front();
 156                 res.prefix = renormalize(parseSurname(sname).first);
 157                 res.surname = renormalize(parseSurname(sname).second);
 158                 // If we have three pieces (the maximum allowed by BibTeX),
 159                 // the second one is the name suffix.
 160                 if (pieces.size() > 2)
 161                         res.suffix = renormalize(pieces.at(1));
 162                 return res;
 163         }
 164
 165         // OK, so now we want to look for the last name.
 166         // Split on spaces, to get various tokens.
 167         pieces = getVectorFromString(name, from_ascii(" "));
 168         // No space: Only a family name given
 169         if (pieces.size() < 2) {
 170                 res.surname = renormalize(pieces.back());
 171                 return res;
 172         }
 173         // If we get two pieces, assume "prename surname"
 174         if (pieces.size() == 2) {
 175                 res.prename = renormalize(pieces.front());
 176                 res.surname = renormalize(pieces.back());
 177                 return res;
 178         }
 179
 180         // More than 3 pieces: A name prefix (aka "von" part) might be included.
 181         // We look for the first piece that begins with a lower case letter
 182         // (which is the name prefix, if it is not the last token) or the last token.
 183         docstring prename;
 184         vector<docstring>::const_iterator it = pieces.begin();
 185         vector<docstring>::const_iterator const en = pieces.end();
 186         bool first = true;
 187         for (; it != en; ++it) {
 188                 if ((*it).empty())
 189                         continue;
 190                 char_type const c = (*it)[0];
 191                 // If the piece starts with a lower case char, we assume
 192                 // this is the name prefix and thus prename is complete.
 193                 if (isLower(c))
 194                         break;
 195                 // Same if this is the last piece, which is always the surname.
 196                 if (it + 1 == en)
 197                         break;
 198                 // Nothing of the former, so add this piece to the prename
 199                 if (!first)
 200                         prename += " ";
 201                 else
 202                         first = false;
 203                 prename += *it;
 204         }
 205
 206         // Now reconstruct the family name and strip the prefix.
 207         // Note that if we left the loop because it + 1 == en,
 208         // then this will still do the right thing, i.e., make surname
 209         // just be the last piece.
 210         docstring surname;
 211         first = true;
 212         for (; it != en; ++it) {
 213                 if (!first)
 214                         surname += " ";
 215                 else
 216                         first = false;
 217                 surname += *it;
 218         }
 219         res.prename = renormalize(prename);
 220         res.prefix = renormalize(parseSurname(surname).first);
 221         res.surname = renormalize(parseSurname(surname).second);
 222         return res;
 223 }
 224
 225
 226 docstring constructName(docstring const & name, string const & scheme)
 227 {
 228         // re-constructs a name from name parts according
 229         // to a given scheme
 230         docstring const prename = nameParts(name).prename;
 231         docstring const surname = nameParts(name).surname;
 232         docstring const prefix = nameParts(name).prefix;
 233         docstring const suffix = nameParts(name).suffix;
 234         string res = scheme;
 235         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 236         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 237         static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 238         smatch sub;
 239         // Changing the first parameter of regex_match() may corrupt the
 240         // second one. In this case we use the temporary string tmp.
 241         if (regex_match(scheme, sub, reg1)) {
 242                 res = sub.str(1);
 243                 if (!prename.empty())
 244                         res += sub.str(3);
 245                 res += sub.str(5);
 246         }
 247         if (regex_match(res, sub, reg2)) {
 248                 string tmp = sub.str(1);
 249                 if (!suffix.empty())
 250                         tmp += sub.str(3);
 251                 res = tmp + sub.str(5);
 252         }
 253         if (regex_match(res, sub, reg3)) {
 254                 string tmp = sub.str(1);
 255                 if (!prefix.empty())
 256                         tmp += sub.str(3);
 257                 res = tmp + sub.str(5);
 258         }
 259         docstring result = from_ascii(res);
 260         result = subst(result, from_ascii("%prename%"), prename);
 261         result = subst(result, from_ascii("%surname%"), surname);
 262         result = subst(result, from_ascii("%prefix%"), prefix);
 263         result = subst(result, from_ascii("%suffix%"), suffix);
 264         return result;
 265 }
 266
 267
 268 vector<docstring> const getAuthors(docstring const & author)
 269 {
 270         // We check for goupings (via {...}) and only consider " and "
 271         // outside groups as author separator. This is to account
 272         // for cases such as {{Barnes and Noble, Inc.}}, which
 273         // need to be treated as one single family name.
 274         // We use temporary placeholders in order to differentiate the
 275         // diverse " and " cases.
 276
 277         // First, we temporarily replace all ampersands. It is rather unusual
 278         // in author names, but can happen (consider cases such as "C \& A Corp.").
 279         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 280         // Then, we temporarily make all " and " strings to ampersands in order
 281         // to handle them later on a per-char level.
 282         iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
 283         // Now we traverse through the string and replace the "&" by the proper
 284         // output in- and outside groups
 285         docstring name;
 286         int gl = 0;
 287         docstring::const_iterator p = iname.begin();
 288         while (p != iname.end()) {
 289                 // count grouping level
 290                 if (*p == '{')
 291                         ++gl;
 292                 else if (*p == '}')
 293                         --gl;
 294                 // generate string with probable placeholders
 295                 if (*p == '&') {
 296                         if (gl > 0)
 297                                 // Inside groups, we output "and"
 298                                 name += from_ascii("and");
 299                         else
 300                                 // Outside groups, we output a separator
 301                                 name += from_ascii("$$namesep!");
 302                 }
 303                 else
 304                         name += *p;
 305                 ++p;
 306         }
 307
 308         // re-insert the literal ampersands
 309         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 310
 311         // Now construct the actual vector
 312         return getVectorFromString(name, from_ascii(" $$namesep! "));
 313 }
 314
 315
 316 bool multipleAuthors(docstring const & author)
 317 {
 318         return getAuthors(author).size() > 1;
 319 }
 320
 321
 322 // converts a string containing LaTeX commands into unicode
 323 // for display.
 324 docstring convertLaTeXCommands(docstring const & str)
 325 {
 326         docstring val = str;
 327         docstring ret;
 328
 329         bool scanning_cmd = false;
 330         bool scanning_math = false;
 331         bool escaped = false; // used to catch \$, etc.
 332         while (!val.empty()) {
 333                 char_type const ch = val[0];
 334
 335                 // if we're scanning math, we output everything until we
 336                 // find an unescaped $, at which point we break out.
 337                 if (scanning_math) {
 338                         if (escaped)
 339                                 escaped = false;
 340                         else if (ch == '\\')
 341                                 escaped = true;
 342                         else if (ch == '$')
 343                                 scanning_math = false;
 344                         ret += ch;
 345                         val = val.substr(1);
 346                         continue;
 347                 }
 348
 349                 // if we're scanning a command name, then we just
 350                 // discard characters until we hit something that
 351                 // isn't alpha.
 352                 if (scanning_cmd) {
 353                         if (isAlphaASCII(ch)) {
 354                                 val = val.substr(1);
 355                                 escaped = false;
 356                                 continue;
 357                         }
 358                         // so we're done with this command.
 359                         // now we fall through and check this character.
 360                         scanning_cmd = false;
 361                 }
 362
 363                 // was the last character a \? If so, then this is something like:
 364                 // \\ or \$, so we'll just output it. That's probably not always right...
 365                 if (escaped) {
 366                         // exception: output \, as THIN SPACE
 367                         if (ch == ',')
 368                                 ret.push_back(0x2009);
 369                         else
 370                                 ret += ch;
 371                         val = val.substr(1);
 372                         escaped = false;
 373                         continue;
 374                 }
 375
 376                 if (ch == '$') {
 377                         ret += ch;
 378                         val = val.substr(1);
 379                         scanning_math = true;
 380                         continue;
 381                 }
 382
 383                 // Change text mode accents in the form
 384                 // {\v a} to \v{a} (see #9340).
 385                 // FIXME: This is a sort of mini-tex2lyx.
 386                 //        Use the real tex2lyx instead!
 387                 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
 388                 if (lyx::regex_search(to_utf8(val), tma_reg)) {
 389                         val = val.substr(1);
 390                         val.replace(2, 1, from_ascii("{"));
 391                         continue;
 392                 }
 393
 394                 // Apart from the above, we just ignore braces
 395                 if (ch == '{' || ch == '}') {
 396                         val = val.substr(1);
 397                         continue;
 398                 }
 399
 400                 // we're going to check things that look like commands, so if
 401                 // this doesn't, just output it.
 402                 if (ch != '\\') {
 403                         ret += ch;
 404                         val = val.substr(1);
 405                         continue;
 406                 }
 407
 408                 // ok, could be a command of some sort
 409                 // let's see if it corresponds to some unicode
 410                 // unicodesymbols has things in the form: \"{u},
 411                 // whereas we may see things like: \"u. So we'll
 412                 // look for that and change it, if necessary.
 413                 // FIXME: This is a sort of mini-tex2lyx.
 414                 //        Use the real tex2lyx instead!
 415                 static lyx::regex const reg("^\\\\\\W\\w");
 416                 if (lyx::regex_search(to_utf8(val), reg)) {
 417                         val.insert(3, from_ascii("}"));
 418                         val.insert(2, from_ascii("{"));
 419                 }
 420                 bool termination;
 421                 docstring rem;
 422                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 423                                 Encodings::TEXT_CMD, termination, rem);
 424                 if (!cnvtd.empty()) {
 425                         // it did, so we'll take that bit and proceed with what's left
 426                         ret += cnvtd;
 427                         val = rem;
 428                         continue;
 429                 }
 430                 // it's a command of some sort
 431                 scanning_cmd = true;
 432                 escaped = true;
 433                 val = val.substr(1);
 434         }
 435         return ret;
 436 }
 437
 438
 439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 440 docstring processRichtext(docstring const & str, bool richtext)
 441 {
 442         docstring val = str;
 443         docstring ret;
 444
 445         bool scanning_rich = false;
 446         while (!val.empty()) {
 447                 char_type const ch = val[0];
 448                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 449                         // beginning of rich text
 450                         scanning_rich = true;
 451                         val = val.substr(2);
 452                         continue;
 453                 }
 454                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 455                         // end of rich text
 456                         scanning_rich = false;
 457                         val = val.substr(2);
 458                         continue;
 459                 }
 460                 if (richtext) {
 461                         if (scanning_rich)
 462                                 ret += ch;
 463                         else {
 464                                 // we need to escape '<' and '>'
 465                                 if (ch == '<')
 466                                         ret += "&lt;";
 467                                 else if (ch == '>')
 468                                         ret += "&gt;";
 469                                 else
 470                                         ret += ch;
 471                         }
 472                 } else if (!scanning_rich /* && !richtext */)
 473                         ret += ch;
 474                 // else the character is discarded, which will happen only if
 475                 // richtext == false and we are scanning rich text
 476                 val = val.substr(1);
 477         }
 478         return ret;
 479 }
 480
 481 } // namespace
 482
 483
 484 //////////////////////////////////////////////////////////////////////
 485 //
 486 // BibTeXInfo
 487 //
 488 //////////////////////////////////////////////////////////////////////
 489
 490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 491         : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type), info_(),
 492           modifier_(0)
 493 {}
 494
 495
 496
 497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 498                                           bool full, bool forceshort) const
 499 {
 500         docstring author = operator[]("author");
 501         if (author.empty())
 502                 author = operator[]("editor");
 503
 504         return getAuthorList(buf, author, full, forceshort);
 505 }
 506
 507
 508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 509                 docstring const & author, bool const full, bool const forceshort,
 510                 bool const allnames, bool const beginning) const
 511 {
 512         // Maxnames treshold depend on engine
 513         size_t maxnames = buf ?
 514                 buf->params().documentClass().max_citenames() : 2;
 515
 516         if (!is_bibtex_) {
 517                 docstring const opt = label();
 518                 if (opt.empty())
 519                         return docstring();
 520
 521                 docstring authors;
 522                 docstring const remainder = trim(split(opt, authors, '('));
 523                 if (remainder.empty())
 524                         // in this case, we didn't find a "(",
 525                         // so we don't have author (year)
 526                         return docstring();
 527                 if (full) {
 528                         // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
 529                         docstring const fullauthors = trim(rsplit(remainder, ')'));
 530                         if (!fullauthors.empty())
 531                                 return fullauthors;
 532                 }
 533                 return authors;
 534         }
 535
 536         if (author.empty())
 537                 return author;
 538
 539         // OK, we've got some names. Let's format them.
 540         // Try to split the author list
 541         vector<docstring> const authors = getAuthors(author);
 542
 543         docstring retval;
 544
 545         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 546                                                : ENGINE_TYPE_DEFAULT;
 547
 548         // These are defined in the styles
 549         string const etal =
 550                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
 551                     : " et al.";
 552         string const namesep =
 553                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
 554                    : ", ";
 555         string const lastnamesep =
 556                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
 557                     : ", and ";
 558         string const pairnamesep =
 559                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
 560                      : " and ";
 561         string firstnameform =
 562                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 563                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 564         if (!beginning)
 565                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 566                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 567         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 568                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 569         if (!beginning)
 570                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 571                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 572         string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
 573                              : "{%prefix%[[%prefix% ]]}%surname%";
 574
 575         // Shorten the list (with et al.) if forceshort is set
 576         // and the list can actually be shortened, else if maxcitenames
 577         // is passed and full is not set.
 578         bool shorten = forceshort && authors.size() > 1;
 579         vector<docstring>::const_iterator it = authors.begin();
 580         vector<docstring>::const_iterator en = authors.end();
 581         for (size_t i = 0; it != en; ++it, ++i) {
 582                 if (i >= maxnames && !full) {
 583                         shorten = true;
 584                         break;
 585                 }
 586                 if (*it == "others") {
 587                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 588                         break;
 589                 }
 590                 if (i > 0 && i == authors.size() - 1) {
 591                         if (authors.size() == 2)
 592                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 593                         else
 594                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 595                 } else if (i > 0)
 596                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 597                 if (allnames)
 598                         retval += (i == 0) ? constructName(*it, firstnameform)
 599                                 : constructName(*it, othernameform);
 600                 else
 601                         retval += constructName(*it, citenameform);
 602         }
 603         if (shorten) {
 604                 if (allnames)
 605                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 606                 else
 607                         retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 608         }
 609
 610         return convertLaTeXCommands(retval);
 611 }
 612
 613
 614 docstring const BibTeXInfo::getYear() const
 615 {
 616         if (is_bibtex_) {
 617                 // first try legacy year field
 618                 docstring year = operator[]("year");
 619                 if (!year.empty())
 620                         return year;
 621                 // now try biblatex's date field
 622                 year = operator[]("date");
 623                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 624                 // We only want the years.
 625                 static regex const yreg("[-]?([\\d]{4}).*");
 626                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 627                 smatch sm;
 628                 string const date = to_utf8(year);
 629                 if (!regex_match(date, sm, yreg))
 630                         // cannot parse year.
 631                         return docstring();
 632                 year = from_ascii(sm[1]);
 633                 // check for an endyear
 634                 if (regex_match(date, sm, ereg))
 635                         year += char_type(0x2013) + from_ascii(sm[1]);
 636                 return year;
 637         }
 638
 639         docstring const opt = label();
 640         if (opt.empty())
 641                 return docstring();
 642
 643         docstring authors;
 644         docstring tmp = split(opt, authors, '(');
 645         if (tmp.empty())
 646                 // we don't have author (year)
 647                 return docstring();
 648         docstring year;
 649         tmp = split(tmp, year, ')');
 650         return year;
 651 }
 652
 653
 654 namespace {
 655
 656 docstring parseOptions(docstring const & format, string & optkey,
 657                     docstring & ifpart, docstring & elsepart);
 658
 659 // Calls parseOptions to deal with an embedded option, such as:
 660 //   {%number%[[, no.~%number%]]}
 661 // which must appear at the start of format. ifelsepart gets the
 662 // whole of the option, and we return what's left after the option.
 663 // we return format if there is an error.
 664 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 665 {
 666         LASSERT(format[0] == '{' && format[1] == '%', return format);
 667         string optkey;
 668         docstring ifpart;
 669         docstring elsepart;
 670         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 671         if (format == rest) { // parse error
 672                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 673                 return format;
 674         }
 675         LASSERT(rest.size() <= format.size(),
 676                 { ifelsepart = docstring(); return format; });
 677         ifelsepart = format.substr(0, format.size() - rest.size());
 678         return rest;
 679 }
 680
 681
 682 // Gets a "clause" from a format string, where the clause is
 683 // delimited by '[[' and ']]'. Returns what is left after the
 684 // clause is removed, and returns format if there is an error.
 685 docstring getClause(docstring const & format, docstring & clause)
 686 {
 687         docstring fmt = format;
 688         // remove '[['
 689         fmt = fmt.substr(2);
 690         // we'll remove characters from the front of fmt as we
 691         // deal with them
 692         while (!fmt.empty()) {
 693                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 694                         // that's the end
 695                         fmt = fmt.substr(2);
 696                         break;
 697                 }
 698                 // check for an embedded option
 699                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 700                         docstring part;
 701                         docstring const rest = parseEmbeddedOption(fmt, part);
 702                         if (fmt == rest) {
 703                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 704                                 return format;
 705                         }
 706                         clause += part;
 707                         fmt = rest;
 708                 } else { // it's just a normal character
 709                                 clause += fmt[0];
 710                                 fmt = fmt.substr(1);
 711                 }
 712         }
 713         return fmt;
 714 }
 715
 716
 717 // parse an options string, which must appear at the start of the
 718 // format parameter. puts the parsed bits in optkey, ifpart, and
 719 // elsepart and returns what's left after the option is removed.
 720 // if there's an error, it returns format itself.
 721 docstring parseOptions(docstring const & format, string & optkey,
 722                     docstring & ifpart, docstring & elsepart)
 723 {
 724         LASSERT(format[0] == '{' && format[1] == '%', return format);
 725         // strip '{%'
 726         docstring fmt = format.substr(2);
 727         size_t pos = fmt.find('%'); // end of key
 728         if (pos == string::npos) {
 729                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 730                 return format;
 731         }
 732         optkey = to_utf8(fmt.substr(0, pos));
 733         fmt = fmt.substr(pos + 1);
 734         // [[format]] should be next
 735         if (fmt[0] != '[' || fmt[1] != '[') {
 736                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 737                 return format;
 738         }
 739
 740         docstring curfmt = fmt;
 741         fmt = getClause(curfmt, ifpart);
 742         if (fmt == curfmt) {
 743                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 744                 return format;
 745         }
 746
 747         if (fmt[0] == '}') // we're done, no else clause
 748                 return fmt.substr(1);
 749
 750         // else part should follow
 751         if (fmt[0] != '[' || fmt[1] != '[') {
 752                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 753                 return format;
 754         }
 755
 756         curfmt = fmt;
 757         fmt = getClause(curfmt, elsepart);
 758         // we should be done
 759         if (fmt == curfmt || fmt[0] != '}') {
 760                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 761                 return format;
 762         }
 763         return fmt.substr(1);
 764 }
 765
 766
 767 } // namespace
 768
 769 /* FIXME
 770 Bug #9131 revealed an oddity in how we are generating citation information
 771 when more than one key is given. We end up building a longer and longer format
 772 string as we go, which we then have to re-parse, over and over and over again,
 773 rather than generating the information for the individual keys and then putting
 774 all of that together. We do that to deal with the way separators work, from what
 775 I can tell, but it still feels like a hack. Fixing this would require quite a
 776 bit of work, however.
 777 */
 778 docstring BibTeXInfo::expandFormat(docstring const & format,
 779                 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
 780                 CiteItem const & ci, bool next, bool second) const
 781 {
 782         // incorrect use of macros could put us in an infinite loop
 783         static int const max_passes = 5000;
 784         // the use of overly large keys can lead to performance problems, due
 785         // to eventual attempts to convert LaTeX macros to unicode. See bug
 786         // #8944. By default, the size is limited to 128 (in CiteItem), but
 787         // for specific purposes (such as XHTML export), it needs to be enlarged
 788         // This is perhaps not the best solution, but it will have to do for now.
 789         size_t const max_keysize = ci.max_key_size;
 790         odocstringstream ret; // return value
 791         string key;
 792         bool scanning_key = false;
 793         bool scanning_rich = false;
 794
 795         CiteEngineType const engine_type = buf.params().citeEngineType();
 796         docstring fmt = format;
 797         // we'll remove characters from the front of fmt as we
 798         // deal with them
 799         while (!fmt.empty()) {
 800                 if (counter > max_passes) {
 801                         LYXERR0("Recursion limit reached while parsing `"
 802                                 << format << "'.");
 803                         return _("ERROR!");
 804                 }
 805
 806                 char_type thischar = fmt[0];
 807                 if (thischar == '%') {
 808                         // beginning or end of key
 809                         if (scanning_key) {
 810                                 // end of key
 811                                 scanning_key = false;
 812                                 // so we replace the key with its value, which may be empty
 813                                 if (key[0] == '!') {
 814                                         // macro
 815                                         string const val =
 816                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 817                                         fmt = from_utf8(val) + fmt.substr(1);
 818                                         counter += 1;
 819                                         continue;
 820                                 } else if (prefixIs(key, "B_")) {
 821                                         // a translatable bit (to the Buffer language)
 822                                         string const val =
 823                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 824                                         docstring const trans =
 825                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 826                                         ret << trans;
 827                                 } else if (key[0] == '_') {
 828                                         // a translatable bit (to the GUI language)
 829                                         string const val =
 830                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 831                                         docstring const trans =
 832                                                 translateIfPossible(from_utf8(val));
 833                                         ret << trans;
 834                                 } else {
 835                                         docstring const val =
 836                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 837                                         if (!scanning_rich)
 838                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 839                                         ret << val;
 840                                         if (!scanning_rich)
 841                                                 ret << from_ascii("{!</span>!}");
 842                                 }
 843                         } else {
 844                                 // beginning of key
 845                                 key.clear();
 846                                 scanning_key = true;
 847                         }
 848                 }
 849                 else if (thischar == '{') {
 850                         // beginning of option?
 851                         if (scanning_key) {
 852                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 853                                 return _("ERROR!");
 854                         }
 855                         if (fmt.size() > 1) {
 856                                 if (fmt[1] == '%') {
 857                                         // it is the beginning of an optional format
 858                                         string optkey;
 859                                         docstring ifpart;
 860                                         docstring elsepart;
 861                                         docstring const newfmt =
 862                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 863                                         if (newfmt == fmt) // parse error
 864                                                 return _("ERROR!");
 865                                         fmt = newfmt;
 866                                         docstring const val =
 867                                                 getValueForKey(optkey, buf, ci, xrefs);
 868                                         if (optkey == "next" && next)
 869                                                 ret << ifpart; // without expansion
 870                                         else if (optkey == "second" && second) {
 871                                                 int newcounter = 0;
 872                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 873                                                         ci, next);
 874                                         } else if (!val.empty()) {
 875                                                 int newcounter = 0;
 876                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 877                                                         ci, next);
 878                                         } else if (!elsepart.empty()) {
 879                                                 int newcounter = 0;
 880                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 881                                                         ci, next);
 882                                         }
 883                                         // fmt will have been shortened for us already
 884                                         continue;
 885                                 }
 886                                 if (fmt[1] == '!') {
 887                                         // beginning of rich text
 888                                         scanning_rich = true;
 889                                         fmt = fmt.substr(2);
 890                                         ret << from_ascii("{!");
 891                                         continue;
 892                                 }
 893                         }
 894                         // we are here if '{' was not followed by % or !.
 895                         // So it's just a character.
 896                         ret << thischar;
 897                 }
 898                 else if (scanning_rich && thischar == '!'
 899                          && fmt.size() > 1 && fmt[1] == '}') {
 900                         // end of rich text
 901                         scanning_rich = false;
 902                         fmt = fmt.substr(2);
 903                         ret << from_ascii("!}");
 904                         continue;
 905                 }
 906                 else if (scanning_key)
 907                         key += char(thischar);
 908                 else {
 909                         try {
 910                                 ret.put(thischar);
 911                         } catch (EncodingException & /* e */) {
 912                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
 913                         }
 914                 }
 915                 fmt = fmt.substr(1);
 916         } // for loop
 917         if (scanning_key) {
 918                 LYXERR0("Never found end of key in `" << format << "'!");
 919                 return _("ERROR!");
 920         }
 921         if (scanning_rich) {
 922                 LYXERR0("Never found end of rich text in `" << format << "'!");
 923                 return _("ERROR!");
 924         }
 925         return ret.str();
 926 }
 927
 928
 929 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
 930         Buffer const & buf, CiteItem const & ci) const
 931 {
 932         bool const richtext = ci.richtext;
 933
 934         if (!richtext && !info_.empty())
 935                 return info_;
 936         if (richtext && !info_richtext_.empty())
 937                 return info_richtext_;
 938
 939         if (!is_bibtex_) {
 940                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 941                 info_ = it->second;
 942                 return info_;
 943         }
 944
 945         CiteEngineType const engine_type = buf.params().citeEngineType();
 946         DocumentClass const & dc = buf.params().documentClass();
 947         docstring const & format =
 948                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
 949         int counter = 0;
 950         info_ = expandFormat(format, xrefs, counter, buf,
 951                 ci, false, false);
 952
 953         if (info_.empty()) {
 954                 // this probably shouldn't happen
 955                 return info_;
 956         }
 957
 958         if (richtext) {
 959                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
 960                 return info_richtext_;
 961         }
 962
 963         info_ = convertLaTeXCommands(processRichtext(info_, false));
 964         return info_;
 965 }
 966
 967
 968 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
 969         Buffer const & buf, docstring const & format,
 970         CiteItem const & ci, bool next, bool second) const
 971 {
 972         docstring loclabel;
 973
 974         int counter = 0;
 975         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 976
 977         if (!loclabel.empty() && !next) {
 978                 loclabel = processRichtext(loclabel, ci.richtext);
 979                 loclabel = convertLaTeXCommands(loclabel);
 980         }
 981
 982         return loclabel;
 983 }
 984
 985
 986 docstring const & BibTeXInfo::operator[](docstring const & field) const
 987 {
 988         BibTeXInfo::const_iterator it = find(field);
 989         if (it != end())
 990                 return it->second;
 991         static docstring const empty_value = docstring();
 992         return empty_value;
 993 }
 994
 995
 996 docstring const & BibTeXInfo::operator[](string const & field) const
 997 {
 998         return operator[](from_ascii(field));
 999 }
1000
1001
1002 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1003         CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1004 {
1005         // anything less is pointless
1006         LASSERT(maxsize >= 16, maxsize = 16);
1007         string key = oldkey;
1008         bool cleanit = false;
1009         if (prefixIs(oldkey, "clean:")) {
1010                 key = oldkey.substr(6);
1011                 cleanit = true;
1012         }
1013
1014         docstring ret = operator[](key);
1015         if (ret.empty() && !xrefs.empty()) {
1016                 // xr is a (reference to a) BibTeXInfo const *
1017                 for (auto const & xr : xrefs) {
1018                         if (xr && !(*xr)[key].empty()) {
1019                                 ret = (*xr)[key];
1020                                 break;
1021                         }
1022                 }
1023         }
1024         if (ret.empty()) {
1025                 // some special keys
1026                 // FIXME: dialog, textbefore and textafter have nothing to do with this
1027                 if (key == "dialog" && ci.context == CiteItem::Dialog)
1028                         ret = from_ascii("x"); // any non-empty string will do
1029                 else if (key == "export" && ci.context == CiteItem::Export)
1030                         ret = from_ascii("x"); // any non-empty string will do
1031                 else if (key == "ifstar" && ci.Starred)
1032                         ret = from_ascii("x"); // any non-empty string will do
1033                 else if (key == "ifqualified" && ci.isQualified)
1034                         ret = from_ascii("x"); // any non-empty string will do
1035                 else if (key == "entrytype")
1036                         ret = entry_type_;
1037                 else if (prefixIs(key, "ifentrytype:")
1038                          && from_ascii(key.substr(12)) == entry_type_)
1039                         ret = from_ascii("x"); // any non-empty string will do
1040                 else if (key == "key")
1041                         ret = bib_key_;
1042                 else if (key == "label")
1043                         ret = label_;
1044                 else if (key == "modifier" && modifier_ != 0)
1045                         ret = modifier_;
1046                 else if (key == "numericallabel")
1047                         ret = cite_number_;
1048                 else if (prefixIs(key, "ifmultiple:")) {
1049                         // Return whether we have multiple authors
1050                         docstring const kind = operator[](from_ascii(key.substr(11)));
1051                         if (multipleAuthors(kind))
1052                                 ret = from_ascii("x"); // any non-empty string will do
1053                 }
1054                 else if (prefixIs(key, "abbrvnames:")) {
1055                         // Special key to provide abbreviated name list,
1056                         // with respect to maxcitenames. Suitable for Bibliography
1057                         // beginnings.
1058                         docstring const kind = operator[](from_ascii(key.substr(11)));
1059                         ret = getAuthorList(&buf, kind, false, false, true);
1060                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1061                                 ret[0] = uppercase(ret[0]);
1062                 } else if (prefixIs(key, "fullnames:")) {
1063                         // Return a full name list. Suitable for Bibliography
1064                         // beginnings.
1065                         docstring const kind = operator[](from_ascii(key.substr(10)));
1066                         ret = getAuthorList(&buf, kind, true, false, true);
1067                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1068                                 ret[0] = uppercase(ret[0]);
1069                 } else if (prefixIs(key, "forceabbrvnames:")) {
1070                         // Special key to provide abbreviated name lists,
1071                         // irrespective of maxcitenames. Suitable for Bibliography
1072                         // beginnings.
1073                         docstring const kind = operator[](from_ascii(key.substr(15)));
1074                         ret = getAuthorList(&buf, kind, false, true, true);
1075                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1076                                 ret[0] = uppercase(ret[0]);
1077                 } else if (prefixIs(key, "abbrvbynames:")) {
1078                         // Special key to provide abbreviated name list,
1079                         // with respect to maxcitenames. Suitable for further names inside a
1080                         // bibliography item // (such as "ed. by ...")
1081                         docstring const kind = operator[](from_ascii(key.substr(11)));
1082                         ret = getAuthorList(&buf, kind, false, false, true, false);
1083                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1084                                 ret[0] = uppercase(ret[0]);
1085                 } else if (prefixIs(key, "fullbynames:")) {
1086                         // Return a full name list. Suitable for further names inside a
1087                         // bibliography item // (such as "ed. by ...")
1088                         docstring const kind = operator[](from_ascii(key.substr(10)));
1089                         ret = getAuthorList(&buf, kind, true, false, true, false);
1090                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1091                                 ret[0] = uppercase(ret[0]);
1092                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1093                         // Special key to provide abbreviated name lists,
1094                         // irrespective of maxcitenames. Suitable for further names inside a
1095                         // bibliography item // (such as "ed. by ...")
1096                         docstring const kind = operator[](from_ascii(key.substr(15)));
1097                         ret = getAuthorList(&buf, kind, false, true, true, false);
1098                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1099                                 ret[0] = uppercase(ret[0]);
1100                 } else if (key == "abbrvciteauthor") {
1101                         // Special key to provide abbreviated author or
1102                         // editor names (suitable for citation labels),
1103                         // with respect to maxcitenames.
1104                         ret = getAuthorOrEditorList(&buf, false, false);
1105                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1106                                 ret[0] = uppercase(ret[0]);
1107                 } else if (key == "fullciteauthor") {
1108                         // Return a full author or editor list (for citation labels)
1109                         ret = getAuthorOrEditorList(&buf, true, false);
1110                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1111                                 ret[0] = uppercase(ret[0]);
1112                 } else if (key == "forceabbrvciteauthor") {
1113                         // Special key to provide abbreviated author or
1114                         // editor names (suitable for citation labels),
1115                         // irrespective of maxcitenames.
1116                         ret = getAuthorOrEditorList(&buf, false, true);
1117                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1118                                 ret[0] = uppercase(ret[0]);
1119                 } else if (key == "bibentry") {
1120                         // Special key to provide the full bibliography entry: see getInfo()
1121                         CiteEngineType const engine_type = buf.params().citeEngineType();
1122                         DocumentClass const & dc = buf.params().documentClass();
1123                         docstring const & format =
1124                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1125                         int counter = 0;
1126                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1127                 } else if (key == "textbefore")
1128                         ret = ci.textBefore;
1129                 else if (key == "textafter")
1130                         ret = ci.textAfter;
1131                 else if (key == "curpretext") {
1132                         vector<pair<docstring, docstring>> pres = ci.getPretexts();
1133                         vector<pair<docstring, docstring>>::iterator it = pres.begin();
1134                         int numkey = 1;
1135                         for (; it != pres.end() ; ++it) {
1136                                 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1137                                         ret = (*it).second;
1138                                         pres.erase(it);
1139                                         break;
1140                                 }
1141                                 if ((*it).first == bib_key_)
1142                                         ++numkey;
1143                         }
1144                 } else if (key == "curposttext") {
1145                         vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1146                         vector<pair<docstring, docstring>>::iterator it = posts.begin();
1147                         int numkey = 1;
1148                         for (; it != posts.end() ; ++it) {
1149                                 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1150                                         ret = (*it).second;
1151                                         posts.erase(it);
1152                                         break;
1153                                 }
1154                                 if ((*it).first == bib_key_)
1155                                         ++numkey;
1156                         }
1157                 } else if (key == "year")
1158                         ret = getYear();
1159         }
1160
1161         if (cleanit)
1162                 ret = xml::cleanAttr(ret);
1163
1164         // make sure it is not too big
1165         support::truncateWithEllipsis(ret, maxsize);
1166         return ret;
1167 }
1168
1169
1170 //////////////////////////////////////////////////////////////////////
1171 //
1172 // BiblioInfo
1173 //
1174 //////////////////////////////////////////////////////////////////////
1175
1176 namespace {
1177
1178 // A functor for use with sort, leading to case insensitive sorting
1179 bool compareNoCase(const docstring & a, const docstring & b) {
1180         return compare_no_case(a, b) < 0;
1181 }
1182
1183 } // namespace
1184
1185
1186 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1187 {
1188         vector<docstring> result;
1189         if (!data.isBibTeX())
1190                 return result;
1191         // Legacy crossref field. This is not nestable.
1192         if (!nested && !data["crossref"].empty()) {
1193                 docstring const xrefkey = data["crossref"];
1194                 result.push_back(xrefkey);
1195                 // However, check for nested xdatas
1196                 BiblioInfo::const_iterator it = find(xrefkey);
1197                 if (it != end()) {
1198                         BibTeXInfo const & xref = it->second;
1199                         vector<docstring> const nxdata = getXRefs(xref, true);
1200                         if (!nxdata.empty())
1201                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1202                 }
1203         }
1204         // Biblatex's xdata field. Infinitely nestable.
1205         // XData field can consist of a comma-separated list of keys
1206         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1207         if (!xdatakeys.empty()) {
1208                 for (auto const & xdatakey : xdatakeys) {
1209                         result.push_back(xdatakey);
1210                         BiblioInfo::const_iterator it = find(xdatakey);
1211                         if (it != end()) {
1212                                 BibTeXInfo const & xdata = it->second;
1213                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1214                                 if (!nxdata.empty())
1215                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1216                         }
1217                 }
1218         }
1219         return result;
1220 }
1221
1222
1223 vector<docstring> const BiblioInfo::getKeys() const
1224 {
1225         vector<docstring> bibkeys;
1226         for (auto const & bi : *this)
1227                 bibkeys.push_back(bi.first);
1228         sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1229         return bibkeys;
1230 }
1231
1232
1233 vector<docstring> const BiblioInfo::getFields() const
1234 {
1235         vector<docstring> bibfields;
1236         for (auto const & fn : field_names_)
1237                 bibfields.push_back(fn);
1238         sort(bibfields.begin(), bibfields.end());
1239         return bibfields;
1240 }
1241
1242
1243 vector<docstring> const BiblioInfo::getEntries() const
1244 {
1245         vector<docstring> bibentries;
1246         for (auto const & et : entry_types_)
1247                 bibentries.push_back(et);
1248         sort(bibentries.begin(), bibentries.end());
1249         return bibentries;
1250 }
1251
1252
1253 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1254 {
1255         BiblioInfo::const_iterator it = find(key);
1256         if (it == end())
1257                 return docstring();
1258         BibTeXInfo const & data = it->second;
1259         return data.getAuthorOrEditorList(&buf, false);
1260 }
1261
1262
1263 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1264 {
1265         BiblioInfo::const_iterator it = find(key);
1266         if (it == end())
1267                 return docstring();
1268         BibTeXInfo const & data = it->second;
1269         return data.citeNumber();
1270 }
1271
1272
1273 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1274 {
1275         BiblioInfo::const_iterator it = find(key);
1276         if (it == end())
1277                 return docstring();
1278         BibTeXInfo const & data = it->second;
1279         docstring year = data.getYear();
1280         if (year.empty()) {
1281                 // let's try the crossrefs
1282                 vector<docstring> const xrefs = getXRefs(data);
1283                 if (xrefs.empty())
1284                         // no luck
1285                         return docstring();
1286                 for (docstring const & xref : xrefs) {
1287                         BiblioInfo::const_iterator const xrefit = find(xref);
1288                         if (xrefit == end())
1289                                 continue;
1290                         BibTeXInfo const & xref_data = xrefit->second;
1291                         year = xref_data.getYear();
1292                         if (!year.empty())
1293                                 // success!
1294                                 break;
1295                 }
1296         }
1297         if (use_modifier && data.modifier() != 0)
1298                 year += data.modifier();
1299         return year;
1300 }
1301
1302
1303 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1304 {
1305         docstring const year = getYear(key, use_modifier);
1306         if (year.empty())
1307                 return buf.B_("No year");
1308         return year;
1309 }
1310
1311
1312 docstring const BiblioInfo::getInfo(docstring const & key,
1313         Buffer const & buf, CiteItem const & ci) const
1314 {
1315         BiblioInfo::const_iterator it = find(key);
1316         if (it == end())
1317                 return docstring(_("Bibliography entry not found!"));
1318         BibTeXInfo const & data = it->second;
1319         BibTeXInfoList xrefptrs;
1320         for (docstring const & xref : getXRefs(data)) {
1321                 BiblioInfo::const_iterator const xrefit = find(xref);
1322                 if (xrefit != end())
1323                         xrefptrs.push_back(&(xrefit->second));
1324         }
1325         return data.getInfo(xrefptrs, buf, ci);
1326 }
1327
1328
1329 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1330         Buffer const & buf, string const & style, CiteItem const & ci) const
1331 {
1332         size_t max_size = ci.max_size;
1333         // shorter makes no sense
1334         LASSERT(max_size >= 16, max_size = 16);
1335
1336         // we can't display more than 10 of these, anyway
1337         // but since we truncate in the middle,
1338         // we need to split into two halfs.
1339         bool const too_many_keys = keys.size() > 10;
1340         vector<docstring> lkeys;
1341         if (too_many_keys) {
1342                 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1343                 keys.resize(5);
1344                 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1345         }
1346
1347         CiteEngineType const engine_type = buf.params().citeEngineType();
1348         DocumentClass const & dc = buf.params().documentClass();
1349         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1350         docstring ret = format;
1351         vector<docstring>::const_iterator key = keys.begin();
1352         vector<docstring>::const_iterator ken = keys.end();
1353         vector<docstring> handled_keys;
1354         for (int i = 0; key != ken; ++key, ++i) {
1355                 handled_keys.push_back(*key);
1356                 int n = 0;
1357                 for (auto const & k : handled_keys) {
1358                         if (k == *key)
1359                                 ++n;
1360                 }
1361                 BiblioInfo::const_iterator it = find(*key);
1362                 BibTeXInfo empty_data;
1363                 empty_data.key(*key);
1364                 BibTeXInfo & data = empty_data;
1365                 vector<BibTeXInfo const *> xrefptrs;
1366                 if (it != end()) {
1367                         data = it->second;
1368                         for (docstring const & xref : getXRefs(data)) {
1369                                 BiblioInfo::const_iterator const xrefit = find(xref);
1370                                 if (xrefit != end())
1371                                         xrefptrs.push_back(&(xrefit->second));
1372                         }
1373                 }
1374                 data.numKey(n);
1375                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1376         }
1377
1378         support::truncateWithEllipsis(ret, max_size, true);
1379
1380         return ret;
1381 }
1382
1383
1384 bool BiblioInfo::isBibtex(docstring const & key) const
1385 {
1386         docstring key1;
1387         split(key, key1, ',');
1388         BiblioInfo::const_iterator it = find(key1);
1389         if (it == end())
1390                 return false;
1391         return it->second.isBibTeX();
1392 }
1393
1394
1395 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1396         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1397         Buffer const & buf, CiteItem const & ci) const
1398 {
1399         if (empty())
1400                 return vector<pair<docstring,docstring>>();
1401
1402         string style;
1403         CiteStringMap csm(styles.size());
1404         for (size_t i = 0; i != csm.size(); ++i) {
1405                 style = styles[i].name;
1406                 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1407         }
1408
1409         return csm;
1410 }
1411
1412
1413 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1414 {
1415         bimap_.insert(info.begin(), info.end());
1416         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1417         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1418 }
1419
1420
1421 namespace {
1422
1423 // used in xhtml to sort a list of BibTeXInfo objects
1424 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1425 {
1426         docstring const lauth = lhs->getAuthorOrEditorList();
1427         docstring const rauth = rhs->getAuthorOrEditorList();
1428         docstring const lyear = lhs->getYear();
1429         docstring const ryear = rhs->getYear();
1430         docstring const ltitl = lhs->operator[]("title");
1431         docstring const rtitl = rhs->operator[]("title");
1432         return  (lauth < rauth)
1433                 || (lauth == rauth && lyear < ryear)
1434                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1435 }
1436
1437 } // namespace
1438
1439
1440 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1441 {
1442         cited_entries_.clear();
1443         // We are going to collect all the citation keys used in the document,
1444         // getting them from the TOC.
1445         // FIXME We may want to collect these differently, in the first case,
1446         // so that we might have them in order of appearance.
1447         set<docstring> citekeys;
1448         Toc const & toc = *buf.tocBackend().toc("citation");
1449         for (auto const & t : toc) {
1450                 if (t.str().empty())
1451                         continue;
1452                 vector<docstring> const keys = getVectorFromString(t.str());
1453                 citekeys.insert(keys.begin(), keys.end());
1454         }
1455         if (citekeys.empty())
1456                 return;
1457
1458         // We have a set of the keys used in this document.
1459         // We will now convert it to a list of the BibTeXInfo objects used in
1460         // this document...
1461         vector<BibTeXInfo const *> bi;
1462         for (auto const & ck : citekeys) {
1463                 BiblioInfo::const_iterator const bt = find(ck);
1464                 if (bt == end() || !bt->second.isBibTeX())
1465                         continue;
1466                 bi.push_back(&(bt->second));
1467         }
1468         // ...and sort it.
1469         sort(bi.begin(), bi.end(), lSorter);
1470
1471         // Now we can write the sorted keys
1472         // b is a BibTeXInfo const *
1473         for (auto const & b : bi)
1474                 cited_entries_.push_back(b->key());
1475 }
1476
1477
1478 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1479 {
1480         collectCitedEntries(buf);
1481         CiteEngineType const engine_type = buf.params().citeEngineType();
1482         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1483
1484         int keynumber = 0;
1485         char modifier = 0;
1486         // used to remember the last one we saw
1487         // we'll be comparing entries to see if we need to add
1488         // modifiers, like "1984a"
1489         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1490
1491         // add letters to years
1492         for (auto const & ce : cited_entries_) {
1493                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1494                 // this shouldn't happen, but...
1495                 if (biit == bimap_.end())
1496                         // ...fail gracefully, anyway.
1497                         continue;
1498                 BibTeXInfo & entry = biit->second;
1499                 if (numbers) {
1500                         docstring const num = convert<docstring>(++keynumber);
1501                         entry.setCiteNumber(num);
1502                 } else {
1503                         // The first test here is checking whether this is the first
1504                         // time through the loop. If so, then we do not have anything
1505                         // with which to compare.
1506                         if (last != bimap_.end()
1507                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1508                             // we access the year via getYear() so as to get it from the xref,
1509                             // if we need to do so
1510                             && getYear(entry.key()) == getYear(last->second.key())) {
1511                                 if (modifier == 0) {
1512                                         // so the last one should have been 'a'
1513                                         last->second.setModifier('a');
1514                                         modifier = 'b';
1515                                 } else if (modifier == 'z')
1516                                         modifier = 'A';
1517                                 else
1518                                         modifier++;
1519                         } else {
1520                                 modifier = 0;
1521                         }
1522                         entry.setModifier(modifier);
1523                         // remember the last one
1524                         last = biit;
1525                 }
1526         }
1527         // Set the labels
1528         for (auto const & ce : cited_entries_) {
1529                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1530                 // this shouldn't happen, but...
1531                 if (biit == bimap_.end())
1532                         // ...fail gracefully, anyway.
1533                         continue;
1534                 BibTeXInfo & entry = biit->second;
1535                 if (numbers) {
1536                         entry.label(entry.citeNumber());
1537                 } else {
1538                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1539                         // we do it this way so as to access the xref, if necessary
1540                         // note that this also gives us the modifier
1541                         docstring const year = getYear(ce, buf, true);
1542                         if (!auth.empty() && !year.empty())
1543                                 entry.label(auth + ' ' + year);
1544                         else
1545                                 entry.label(entry.key());
1546                 }
1547         }
1548 }
1549
1550
1551 //////////////////////////////////////////////////////////////////////
1552 //
1553 // CitationStyle
1554 //
1555 //////////////////////////////////////////////////////////////////////
1556
1557
1558 CitationStyle citationStyleFromString(string const & command,
1559                                       BufferParams const & params)
1560 {
1561         CitationStyle cs;
1562         if (command.empty())
1563                 return cs;
1564
1565         string const alias = params.getCiteAlias(command);
1566         string cmd = alias.empty() ? command : alias;
1567         if (isUpperCase(command[0])) {
1568                 cs.forceUpperCase = true;
1569                 cmd[0] = lowercase(cmd[0]);
1570         }
1571
1572         size_t const n = command.size() - 1;
1573         if (command[n] == '*') {
1574                 cs.hasStarredVersion = true;
1575                 if (suffixIs(cmd, '*'))
1576                         cmd = cmd.substr(0, cmd.size() - 1);
1577         }
1578
1579         cs.name = cmd;
1580         return cs;
1581 }
1582
1583
1584 string citationStyleToString(const CitationStyle & cs, bool const latex)
1585 {
1586         string cmd = latex ? cs.cmd : cs.name;
1587         if (cs.forceUpperCase)
1588                 cmd[0] = uppercase(cmd[0]);
1589         if (cs.hasStarredVersion)
1590                 cmd += '*';
1591         return cmd;
1592 }
1593
1594
1595 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1596 {
1597         // This function closely mimics getAuthorList, but produces DocBook instead of text.
1598         // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1599         // as the output has a database-like shape.
1600         // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1601
1602         if (authorsString.empty()) {
1603                 return docstring();
1604         }
1605
1606         // Split the input list of authors into individual authors.
1607         vector<docstring> const authors = getAuthors(authorsString);
1608
1609         // Retrieve the "et al." variation.
1610         string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1611
1612         // Output the list of authors.
1613         xs << xml::StartTag("authorgroup");
1614         auto it = authors.cbegin();
1615         auto en = authors.cend();
1616         for (size_t i = 0; it != en; ++it, ++i) {
1617                 xs << xml::StartTag("author");
1618                 xs << xml::CR();
1619                 xs << xml::StartTag("personname");
1620                 xs << xml::CR();
1621                 docstring name = *it;
1622
1623                 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1624                 if (name == "others") {
1625                         xs << buf.B_(etal);
1626                 } else {
1627                         name_parts parts = nameParts(name);
1628                         if (! parts.prefix.empty()) {
1629                                 xs << xml::StartTag("honorific");
1630                                 xs << parts.prefix;
1631                                 xs << xml::EndTag("honorific");
1632                                 xs << xml::CR();
1633                         }
1634                         if (! parts.prename.empty()) {
1635                                 xs << xml::StartTag("firstname");
1636                                 xs << parts.prename;
1637                                 xs << xml::EndTag("firstname");
1638                                 xs << xml::CR();
1639                         }
1640                         if (! parts.surname.empty()) {
1641                                 xs << xml::StartTag("surname");
1642                                 xs << parts.surname;
1643                                 xs << xml::EndTag("surname");
1644                                 xs << xml::CR();
1645                         }
1646                         if (! parts.suffix.empty()) {
1647                                 xs << xml::StartTag("othername", "role=\"suffix\"");
1648                                 xs << parts.suffix;
1649                                 xs << xml::EndTag("othername");
1650                                 xs << xml::CR();
1651                         }
1652                 }
1653
1654                 xs << xml::EndTag("personname");
1655                 xs << xml::CR();
1656                 xs << xml::EndTag("author");
1657                 xs << xml::CR();
1658
1659                 // Could add an affiliation after <personname>, but not stored in BibTeX.
1660         }
1661         xs << xml::EndTag("authorgroup");
1662         xs << xml::CR();
1663
1664         return docstring();
1665 }
1666
1667 } // namespace lyx