src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Kimberly Heck
   9  * \author Julien Rioux
  10  * \author Jürgen Spitzmüller
  11  *
  12  * Full author contact details are available in file CREDITS.
  13  */
  14
  15 #include <config.h>
  16
  17 #include "BiblioInfo.h"
  18
  19 #include "Buffer.h"
  20 #include "BufferParams.h"
  21 #include "Citation.h"
  22 #include "Encoding.h"
  23 #include "Language.h"
  24 #include "TextClass.h"
  25 #include "TocBackend.h"
  26 #include "xml.h"
  27
  28 #include "support/convert.h"
  29 #include "support/debug.h"
  30 #include "support/docstream.h"
  31 #include "support/FileName.h"
  32 #include "support/gettext.h"
  33 #include "support/lassert.h"
  34 #include "support/lstrings.h"
  35 #include "support/textutils.h"
  36
  37 #include <map>
  38 #include <regex>
  39 #include <set>
  40
  41 using namespace std;
  42 using namespace lyx::support;
  43
  44
  45 namespace lyx {
  46
  47 namespace {
  48
  49 // Remove placeholders from names
  50 docstring renormalize(docstring const & input)
  51 {
  52         docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
  53         return subst(res, from_ascii("$$comma!"), from_ascii(","));
  54 }
  55
  56
  57 // Split the surname into prefix ("von-part") and family name
  58 pair<docstring, docstring> parseSurname(docstring const & sname)
  59 {
  60         // Split the surname into its tokens
  61         vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
  62         if (pieces.size() < 2)
  63                 return make_pair(docstring(), sname);
  64
  65         // Now we look for pieces that begin with a lower case letter.
  66         // All except for the very last token constitute the "von-part".
  67         docstring prefix;
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator const en = pieces.end();
  70         bool first = true;
  71         for (; it != en; ++it) {
  72                 if ((*it).empty())
  73                         continue;
  74                 // If this is the last piece, then what we now have is
  75                 // the family name, notwithstanding the casing.
  76                 if (it + 1 == en)
  77                         break;
  78                 char_type const c = (*it)[0];
  79                 // If the piece starts with a upper case char, we assume
  80                 // this is part of the surname.
  81                 if (!isLower(c))
  82                         break;
  83                 // Nothing of the former, so add this piece to the prename
  84                 if (!first)
  85                         prefix += " ";
  86                 else
  87                         first = false;
  88                 prefix += *it;
  89         }
  90
  91         // Reconstruct the family name.
  92         // Note that if we left the loop with because it + 1 == en,
  93         // then this will still do the right thing, i.e., make surname
  94         // just be the last piece.
  95         docstring surname;
  96         first = true;
  97         for (; it != en; ++it) {
  98                 if (!first)
  99                         surname += " ";
 100                 else
 101                         first = false;
 102                 surname += *it;
 103         }
 104         return make_pair(prefix, surname);
 105 }
 106
 107
 108 struct name_parts {
 109         docstring surname;
 110         docstring prename;
 111         docstring suffix;
 112         docstring prefix;
 113 };
 114
 115
 116 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
 117 name_parts nameParts(docstring const & iname)
 118 {
 119         name_parts res;
 120         if (iname.empty())
 121                 return res;
 122
 123         // First we check for goupings (via {...}) and replace blanks and
 124         // commas inside groups with temporary placeholders
 125         docstring name;
 126         int gl = 0;
 127         docstring::const_iterator p = iname.begin();
 128         while (p != iname.end()) {
 129                 // count grouping level
 130                 if (*p == '{')
 131                         ++gl;
 132                 else if (*p == '}')
 133                         --gl;
 134                 // generate string with probable placeholders
 135                 if (*p == ' ' && gl > 0)
 136                         name += from_ascii("$$space!");
 137                 else if (*p == ',' && gl > 0)
 138                         name += from_ascii("$$comma!");
 139                 else
 140                         name += *p;
 141                 ++p;
 142         }
 143
 144         // Now we look for a comma, and take the last name to be everything
 145         // preceding the right-most one, so that we also get the name suffix
 146         // (aka "jr" part).
 147         vector<docstring> pieces = getVectorFromString(name);
 148         if (pieces.size() > 1) {
 149                 // Whether we have a name suffix or not, the prename is
 150                 // always last item
 151                 res.prename = renormalize(pieces.back());
 152                 // The family name, conversely, is always the first item.
 153                 // However, it might contain a prefix (aka "von" part)
 154                 docstring const sname = pieces.front();
 155                 res.prefix = renormalize(parseSurname(sname).first);
 156                 res.surname = renormalize(parseSurname(sname).second);
 157                 // If we have three pieces (the maximum allowed by BibTeX),
 158                 // the second one is the name suffix.
 159                 if (pieces.size() > 2)
 160                         res.suffix = renormalize(pieces.at(1));
 161                 return res;
 162         }
 163
 164         // OK, so now we want to look for the last name.
 165         // Split on spaces, to get various tokens.
 166         pieces = getVectorFromString(name, from_ascii(" "));
 167         // No space: Only a family name given
 168         if (pieces.size() < 2) {
 169                 res.surname = renormalize(pieces.back());
 170                 return res;
 171         }
 172         // If we get two pieces, assume "prename surname"
 173         if (pieces.size() == 2) {
 174                 res.prename = renormalize(pieces.front());
 175                 res.surname = renormalize(pieces.back());
 176                 return res;
 177         }
 178
 179         // More than 3 pieces: A name prefix (aka "von" part) might be included.
 180         // We look for the first piece that begins with a lower case letter
 181         // (which is the name prefix, if it is not the last token) or the last token.
 182         docstring prename;
 183         vector<docstring>::const_iterator it = pieces.begin();
 184         vector<docstring>::const_iterator const en = pieces.end();
 185         bool first = true;
 186         for (; it != en; ++it) {
 187                 if ((*it).empty())
 188                         continue;
 189                 char_type const c = (*it)[0];
 190                 // If the piece starts with a lower case char, we assume
 191                 // this is the name prefix and thus prename is complete.
 192                 if (isLower(c))
 193                         break;
 194                 // Same if this is the last piece, which is always the surname.
 195                 if (it + 1 == en)
 196                         break;
 197                 // Nothing of the former, so add this piece to the prename
 198                 if (!first)
 199                         prename += " ";
 200                 else
 201                         first = false;
 202                 prename += *it;
 203         }
 204
 205         // Now reconstruct the family name and strip the prefix.
 206         // Note that if we left the loop because it + 1 == en,
 207         // then this will still do the right thing, i.e., make surname
 208         // just be the last piece.
 209         docstring surname;
 210         first = true;
 211         for (; it != en; ++it) {
 212                 if (!first)
 213                         surname += " ";
 214                 else
 215                         first = false;
 216                 surname += *it;
 217         }
 218         res.prename = renormalize(prename);
 219         res.prefix = renormalize(parseSurname(surname).first);
 220         res.surname = renormalize(parseSurname(surname).second);
 221         return res;
 222 }
 223
 224
 225 docstring constructName(docstring const & name, string const & scheme)
 226 {
 227         // re-constructs a name from name parts according
 228         // to a given scheme
 229         docstring const prename = nameParts(name).prename;
 230         docstring const surname = nameParts(name).surname;
 231         docstring const prefix = nameParts(name).prefix;
 232         docstring const suffix = nameParts(name).suffix;
 233         string res = scheme;
 234         static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 235         static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 236         static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
 237         smatch sub;
 238         // Changing the first parameter of regex_match() may corrupt the
 239         // second one. In this case we use the temporary string tmp.
 240         if (regex_match(scheme, sub, reg1)) {
 241                 res = sub.str(1);
 242                 if (!prename.empty())
 243                         res += sub.str(3);
 244                 res += sub.str(5);
 245         }
 246         if (regex_match(res, sub, reg2)) {
 247                 string tmp = sub.str(1);
 248                 if (!suffix.empty())
 249                         tmp += sub.str(3);
 250                 res = tmp + sub.str(5);
 251         }
 252         if (regex_match(res, sub, reg3)) {
 253                 string tmp = sub.str(1);
 254                 if (!prefix.empty())
 255                         tmp += sub.str(3);
 256                 res = tmp + sub.str(5);
 257         }
 258         docstring result = from_ascii(res);
 259         result = subst(result, from_ascii("%prename%"), prename);
 260         result = subst(result, from_ascii("%surname%"), surname);
 261         result = subst(result, from_ascii("%prefix%"), prefix);
 262         result = subst(result, from_ascii("%suffix%"), suffix);
 263         return result;
 264 }
 265
 266
 267 vector<docstring> const getAuthors(docstring const & author)
 268 {
 269         // We check for goupings (via {...}) and only consider " and "
 270         // outside groups as author separator. This is to account
 271         // for cases such as {{Barnes and Noble, Inc.}}, which
 272         // need to be treated as one single family name.
 273         // We use temporary placeholders in order to differentiate the
 274         // diverse " and " cases.
 275
 276         // First, we temporarily replace all ampersands. It is rather unusual
 277         // in author names, but can happen (consider cases such as "C \& A Corp.").
 278         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
 279         // Then, we temporarily make all " and " strings to ampersands in order
 280         // to handle them later on a per-char level. Note that arbitrary casing
 281         // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
 282         static regex const and_reg("(.* )([aA][nN][dD])( .*)");
 283         smatch sub;
 284         string res = to_utf8(iname);
 285         while (regex_match(res, sub, and_reg))
 286                 res = sub.str(1) + "&" + sub.str(3);
 287         iname = from_utf8(res);
 288         // Now we traverse through the string and replace the "&" by the proper
 289         // output in- and outside groups
 290         docstring name;
 291         int gl = 0;
 292         docstring::const_iterator p = iname.begin();
 293         while (p != iname.end()) {
 294                 // count grouping level
 295                 if (*p == '{')
 296                         ++gl;
 297                 else if (*p == '}')
 298                         --gl;
 299                 // generate string with probable placeholders
 300                 if (*p == '&') {
 301                         if (gl > 0)
 302                                 // Inside groups, we output "and"
 303                                 name += from_ascii("and");
 304                         else
 305                                 // Outside groups, we output a separator
 306                                 name += from_ascii("$$namesep!");
 307                 }
 308                 else
 309                         name += *p;
 310                 ++p;
 311         }
 312
 313         // re-insert the literal ampersands
 314         name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
 315
 316         // Now construct the actual vector
 317         return getVectorFromString(name, from_ascii(" $$namesep! "));
 318 }
 319
 320
 321 bool multipleAuthors(docstring const & author)
 322 {
 323         return getAuthors(author).size() > 1;
 324 }
 325
 326
 327 // converts a string containing LaTeX commands into unicode
 328 // for display.
 329 docstring convertLaTeXCommands(docstring const & str)
 330 {
 331         docstring val = str;
 332         docstring ret;
 333
 334         bool scanning_cmd = false;
 335         bool scanning_math = false;
 336         bool is_section = false;
 337         bool escaped = false; // used to catch \$, etc.
 338         while (!val.empty()) {
 339                 char_type const ch = val[0];
 340
 341                 // if we're scanning math, we output everything until we
 342                 // find an unescaped $, at which point we break out.
 343                 if (scanning_math) {
 344                         if (escaped)
 345                                 escaped = false;
 346                         else if (ch == '\\')
 347                                 escaped = true;
 348                         else if (ch == '$')
 349                                 scanning_math = false;
 350                         ret += ch;
 351                         val = val.substr(1);
 352                         continue;
 353                 }
 354
 355                 // if we're scanning a command name, then we just
 356                 // discard characters until we hit something that
 357                 // isn't alpha.
 358                 if (scanning_cmd) {
 359                         if (!is_section && ch == 'S') {
 360                                 is_section = true;
 361                                 val = val.substr(1);
 362                                 continue;
 363                         }
 364                         if (isAlphaASCII(ch)) {
 365                                 is_section = false;
 366                                 val = val.substr(1);
 367                                 escaped = false;
 368                                 continue;
 369                         } else if (is_section) {
 370                                 ret.push_back(0x00a7);
 371                                 is_section = false;
 372                                 continue;
 373                         }
 374                         // so we're done with this command.
 375                         // now we fall through and check this character.
 376                         is_section = false;
 377                         scanning_cmd = false;
 378                 }
 379
 380                 // was the last character a \? If so, then this is something like:
 381                 // \\ or \$, so we'll just output it. That's probably not always right...
 382                 if (escaped) {
 383                         // exception: output \, as THIN SPACE
 384                         if (ch == ',')
 385                                 ret.push_back(0x2009);
 386                         else
 387                                 ret += ch;
 388                         val = val.substr(1);
 389                         escaped = false;
 390                         continue;
 391                 }
 392
 393                 if (ch == '~') {
 394                         ret += char_type(0x00a0);
 395                         val = val.substr(1);
 396                         continue;
 397                 }
 398
 399                 if (ch == '$') {
 400                         ret += ch;
 401                         val = val.substr(1);
 402                         scanning_math = true;
 403                         continue;
 404                 }
 405
 406                 // Change text mode accents in the form
 407                 // {\v a} to \v{a} (see #9340).
 408                 // FIXME: This is a sort of mini-tex2lyx.
 409                 //        Use the real tex2lyx instead!
 410                 static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
 411                 if (regex_search(to_utf8(val), tma_reg)) {
 412                         val = val.substr(1);
 413                         val.replace(2, 1, from_ascii("{"));
 414                         continue;
 415                 }
 416
 417                 // Apart from the above, we just ignore braces
 418                 if (ch == '{' || ch == '}') {
 419                         val = val.substr(1);
 420                         continue;
 421                 }
 422
 423                 // we're going to check things that look like commands, so if
 424                 // this doesn't, just output it.
 425                 if (ch != '\\') {
 426                         ret += ch;
 427                         val = val.substr(1);
 428                         continue;
 429                 }
 430
 431                 // ok, could be a command of some sort
 432                 // let's see if it corresponds to some unicode
 433                 // unicodesymbols has things in the form: \"{u},
 434                 // whereas we may see things like: \"u. So we'll
 435                 // look for that and change it, if necessary.
 436                 // FIXME: This is a sort of mini-tex2lyx.
 437                 //        Use the real tex2lyx instead!
 438                 static regex const reg("^\\\\\\W\\w");
 439                 if (regex_search(to_utf8(val), reg)) {
 440                         val.insert(3, from_ascii("}"));
 441                         val.insert(2, from_ascii("{"));
 442                 }
 443                 bool termination;
 444                 docstring rem;
 445                 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
 446                                 Encodings::TEXT_CMD, termination, rem);
 447                 if (!cnvtd.empty()) {
 448                         // it did, so we'll take that bit and proceed with what's left
 449                         ret += cnvtd;
 450                         val = rem;
 451                         continue;
 452                 }
 453                 // it's a command of some sort
 454                 scanning_cmd = true;
 455                 escaped = true;
 456                 val = val.substr(1);
 457         }
 458         return ret;
 459 }
 460
 461
 462 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
 463 docstring processRichtext(docstring const & str, bool richtext)
 464 {
 465         docstring val = str;
 466         docstring ret;
 467
 468         bool scanning_rich = false;
 469         while (!val.empty()) {
 470                 char_type const ch = val[0];
 471                 if (ch == '{' && val.size() > 1 && val[1] == '!') {
 472                         // beginning of rich text
 473                         scanning_rich = true;
 474                         val = val.substr(2);
 475                         continue;
 476                 }
 477                 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
 478                         // end of rich text
 479                         scanning_rich = false;
 480                         val = val.substr(2);
 481                         continue;
 482                 }
 483                 if (richtext) {
 484                         if (scanning_rich)
 485                                 ret += ch;
 486                         else {
 487                                 // we need to escape '<' and '>'
 488                                 if (ch == '<')
 489                                         ret += "&lt;";
 490                                 else if (ch == '>')
 491                                         ret += "&gt;";
 492                                 else
 493                                         ret += ch;
 494                         }
 495                 } else if (!scanning_rich /* && !richtext */)
 496                         ret += ch;
 497                 // else the character is discarded, which will happen only if
 498                 // richtext == false and we are scanning rich text
 499                 val = val.substr(1);
 500         }
 501         return ret;
 502 }
 503
 504 } // namespace
 505
 506
 507 //////////////////////////////////////////////////////////////////////
 508 //
 509 // BibTeXInfo
 510 //
 511 //////////////////////////////////////////////////////////////////////
 512
 513 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 514         : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
 515           info_(), format_(), modifier_(0)
 516 {}
 517
 518
 519
 520 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
 521                                           bool full, bool forceshort) const
 522 {
 523         docstring author = operator[]("author");
 524         if (author.empty())
 525                 author = operator[]("editor");
 526
 527         return getAuthorList(buf, author, full, forceshort);
 528 }
 529
 530
 531 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
 532                 docstring const & author, bool const full, bool const forceshort,
 533                 bool const allnames, bool const beginning) const
 534 {
 535         // Maxnames treshold depend on engine
 536         size_t maxnames = buf ?
 537                 buf->params().documentClass().max_citenames() : 2;
 538
 539         if (!is_bibtex_) {
 540                 docstring const opt = label();
 541                 if (opt.empty())
 542                         return docstring();
 543
 544                 docstring authors;
 545                 docstring const remainder = trim(split(opt, authors, '('));
 546                 if (remainder.empty())
 547                         // in this case, we didn't find a "(",
 548                         // so we don't have author (year)
 549                         return docstring();
 550                 if (full) {
 551                         // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
 552                         docstring const fullauthors = trim(rsplit(remainder, ')'));
 553                         if (!fullauthors.empty())
 554                                 return fullauthors;
 555                 }
 556                 return authors;
 557         }
 558
 559         if (author.empty())
 560                 return author;
 561
 562         // OK, we've got some names. Let's format them.
 563         // Try to split the author list
 564         vector<docstring> const authors = getAuthors(author);
 565
 566         docstring retval;
 567
 568         CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
 569                                                : ENGINE_TYPE_DEFAULT;
 570
 571         // These are defined in the styles
 572         string const etal =
 573                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
 574                     : " et al.";
 575         string const namesep =
 576                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
 577                    : ", ";
 578         string const lastnamesep =
 579                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
 580                     : ", and ";
 581         string const pairnamesep =
 582                 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
 583                      : " and ";
 584         string firstnameform =
 585                         buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
 586                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 587         if (!beginning)
 588                 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
 589                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 590         string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
 591                              : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
 592         if (!beginning)
 593                 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
 594                                              : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
 595         string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
 596                              : "{%prefix%[[%prefix% ]]}%surname%";
 597
 598         // Shorten the list (with et al.) if forceshort is set
 599         // and the list can actually be shortened, else if maxcitenames
 600         // is passed and full is not set.
 601         bool shorten = forceshort && authors.size() > 1;
 602         vector<docstring>::const_iterator it = authors.begin();
 603         vector<docstring>::const_iterator en = authors.end();
 604         for (size_t i = 0; it != en; ++it, ++i) {
 605                 if (i >= maxnames && !full) {
 606                         shorten = true;
 607                         break;
 608                 }
 609                 if (*it == "others") {
 610                         retval += buf ? buf->B_(etal) : from_ascii(etal);
 611                         break;
 612                 }
 613                 if (i > 0 && i == authors.size() - 1) {
 614                         if (authors.size() == 2)
 615                                 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
 616                         else
 617                                 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
 618                 } else if (i > 0)
 619                         retval += buf ? buf->B_(namesep) : from_ascii(namesep);
 620                 if (allnames)
 621                         retval += (i == 0) ? constructName(*it, firstnameform)
 622                                 : constructName(*it, othernameform);
 623                 else
 624                         retval += constructName(*it, citenameform);
 625         }
 626         if (shorten) {
 627                 if (allnames)
 628                         retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 629                 else
 630                         retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
 631         }
 632
 633         return convertLaTeXCommands(retval);
 634 }
 635
 636
 637 docstring const BibTeXInfo::getYear() const
 638 {
 639         if (is_bibtex_) {
 640                 // first try legacy year field
 641                 docstring year = operator[]("year");
 642                 if (!year.empty())
 643                         return year;
 644                 // now try biblatex's date field
 645                 year = operator[]("date");
 646                 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
 647                 // We only want the years.
 648                 static regex const yreg("[-]?([\\d]{4}).*");
 649                 static regex const ereg(".*/[-]?([\\d]{4}).*");
 650                 smatch sm;
 651                 string const date = to_utf8(year);
 652                 if (!regex_match(date, sm, yreg))
 653                         // cannot parse year.
 654                         return docstring();
 655                 year = from_ascii(sm[1]);
 656                 // check for an endyear
 657                 if (regex_match(date, sm, ereg))
 658                         year += char_type(0x2013) + from_ascii(sm[1]);
 659                 return year;
 660         }
 661
 662         docstring const opt = label();
 663         if (opt.empty())
 664                 return docstring();
 665
 666         docstring authors;
 667         docstring tmp = split(opt, authors, '(');
 668         if (tmp.empty())
 669                 // we don't have author (year)
 670                 return docstring();
 671         docstring year;
 672         tmp = split(tmp, year, ')');
 673         return year;
 674 }
 675
 676
 677 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
 678 {
 679         if (is_bibtex_) {
 680                 // get "doi" entry from citation record
 681                 doi = operator[]("doi");
 682                 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
 683                         doi = "https://doi.org/" + doi;
 684                 // get "url" entry from citation record
 685                 url = operator[]("url");
 686                 // get "file" entry from citation record
 687                 file = operator[]("file");
 688
 689                 // Jabref case, field has a format:
 690                 // Description:Location:Filetype;Description:Location:Filetype...
 691                 // We will grab only first pdf
 692                 if (!file.empty()) {
 693                         docstring ret, filedest, tmp;
 694                         ret = split(file, tmp, ':');
 695                         tmp = split(ret, filedest, ':');
 696                         //TODO howto deal with relative directories?
 697                         FileName f(to_utf8(filedest));
 698                         if (f.exists())
 699                                 file = "file:///" + filedest;
 700                 }
 701
 702                 // kbibtex case, format:
 703                 // file1.pdf;file2.pdf
 704                 // We will grab only first pdf
 705                 docstring kfile;
 706                 if (file.empty())
 707                         kfile = operator[]("localfile");
 708                 if (!kfile.empty()) {
 709                         docstring filedest, tmp;
 710                         tmp = split(kfile, filedest, ';');
 711                         //TODO howto deal with relative directories?
 712                         FileName f(to_utf8(filedest));
 713                         if (f.exists())
 714                                 file = "file:///" + filedest;
 715                 }
 716
 717                 if (!url.empty())
 718                         return;
 719
 720                 // try biblatex specific fields, see its manual
 721                 // 3.13.7 "Electronic Publishing Informationl"
 722                 docstring eprinttype = operator[]("eprinttype");
 723                 docstring eprint = operator[]("eprint");
 724                 if (eprint.empty())
 725                         return;
 726
 727                 if (eprinttype == "arxiv")
 728                         url = "https://arxiv.org/abs/" + eprint;
 729                 if (eprinttype == "jstor")
 730                         url = "https://www.jstor.org/stable/" + eprint;
 731                 if (eprinttype == "pubmed")
 732                         url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
 733                 if (eprinttype == "hdl")
 734                         url = "https://hdl.handle.net/" + eprint;
 735                 if (eprinttype == "googlebooks")
 736                         url = "http://books.google.com/books?id=" + eprint;
 737
 738                 return;
 739         }
 740
 741         // Here can be handled the bibliography environment. All one could do
 742         // here is let LyX scan the entry for URL or HRef insets.
 743 }
 744
 745
 746 namespace {
 747
 748 docstring parseOptions(docstring const & format, string & optkey,
 749                     docstring & ifpart, docstring & elsepart);
 750
 751 // Calls parseOptions to deal with an embedded option, such as:
 752 //   {%number%[[, no.~%number%]]}
 753 // which must appear at the start of format. ifelsepart gets the
 754 // whole of the option, and we return what's left after the option.
 755 // we return format if there is an error.
 756 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
 757 {
 758         LASSERT(format[0] == '{' && format[1] == '%', return format);
 759         string optkey;
 760         docstring ifpart;
 761         docstring elsepart;
 762         docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
 763         if (format == rest) { // parse error
 764                 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
 765                 return format;
 766         }
 767         LASSERT(rest.size() <= format.size(),
 768                 { ifelsepart = docstring(); return format; });
 769         ifelsepart = format.substr(0, format.size() - rest.size());
 770         return rest;
 771 }
 772
 773
 774 // Gets a "clause" from a format string, where the clause is
 775 // delimited by '[[' and ']]'. Returns what is left after the
 776 // clause is removed, and returns format if there is an error.
 777 docstring getClause(docstring const & format, docstring & clause)
 778 {
 779         docstring fmt = format;
 780         // remove '[['
 781         fmt = fmt.substr(2);
 782         // we'll remove characters from the front of fmt as we
 783         // deal with them
 784         while (!fmt.empty()) {
 785                 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
 786                         // that's the end
 787                         fmt = fmt.substr(2);
 788                         break;
 789                 }
 790                 // check for an embedded option
 791                 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
 792                         docstring part;
 793                         docstring const rest = parseEmbeddedOption(fmt, part);
 794                         if (fmt == rest) {
 795                                 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
 796                                 return format;
 797                         }
 798                         clause += part;
 799                         fmt = rest;
 800                 } else { // it's just a normal character
 801                                 clause += fmt[0];
 802                                 fmt = fmt.substr(1);
 803                 }
 804         }
 805         return fmt;
 806 }
 807
 808
 809 // parse an options string, which must appear at the start of the
 810 // format parameter. puts the parsed bits in optkey, ifpart, and
 811 // elsepart and returns what's left after the option is removed.
 812 // if there's an error, it returns format itself.
 813 docstring parseOptions(docstring const & format, string & optkey,
 814                     docstring & ifpart, docstring & elsepart)
 815 {
 816         LASSERT(format[0] == '{' && format[1] == '%', return format);
 817         // strip '{%'
 818         docstring fmt = format.substr(2);
 819         size_t pos = fmt.find('%'); // end of key
 820         if (pos == string::npos) {
 821                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
 822                 return format;
 823         }
 824         optkey = to_utf8(fmt.substr(0, pos));
 825         fmt = fmt.substr(pos + 1);
 826         // [[format]] should be next
 827         if (fmt[0] != '[' || fmt[1] != '[') {
 828                 LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
 829                 return format;
 830         }
 831
 832         docstring curfmt = fmt;
 833         fmt = getClause(curfmt, ifpart);
 834         if (fmt == curfmt) {
 835                 LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
 836                 return format;
 837         }
 838
 839         if (fmt[0] == '}') // we're done, no else clause
 840                 return fmt.substr(1);
 841
 842         // else part should follow
 843         if (fmt[0] != '[' || fmt[1] != '[') {
 844                 LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
 845                 return format;
 846         }
 847
 848         curfmt = fmt;
 849         fmt = getClause(curfmt, elsepart);
 850         // we should be done
 851         if (fmt == curfmt || fmt[0] != '}') {
 852                 LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
 853                 return format;
 854         }
 855         return fmt.substr(1);
 856 }
 857
 858
 859 } // namespace
 860
 861 /* FIXME
 862 Bug #9131 revealed an oddity in how we are generating citation information
 863 when more than one key is given. We end up building a longer and longer format
 864 string as we go, which we then have to re-parse, over and over and over again,
 865 rather than generating the information for the individual keys and then putting
 866 all of that together. We do that to deal with the way separators work, from what
 867 I can tell, but it still feels like a hack. Fixing this would require quite a
 868 bit of work, however.
 869 */
 870 docstring BibTeXInfo::expandFormat(docstring const & format,
 871                 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
 872                 CiteItem const & ci, bool next, bool second) const
 873 {
 874         // incorrect use of macros could put us in an infinite loop
 875         static int const max_passes = 5000;
 876         // the use of overly large keys can lead to performance problems, due
 877         // to eventual attempts to convert LaTeX macros to unicode. See bug
 878         // #8944. By default, the size is limited to 128 (in CiteItem), but
 879         // for specific purposes (such as XHTML export), it needs to be enlarged
 880         // This is perhaps not the best solution, but it will have to do for now.
 881         size_t const max_keysize = ci.max_key_size;
 882         odocstringstream ret; // return value
 883         string key;
 884         bool scanning_key = false;
 885         bool scanning_rich = false;
 886
 887         CiteEngineType const engine_type = buf.params().citeEngineType();
 888         docstring fmt = format;
 889         // we'll remove characters from the front of fmt as we
 890         // deal with them
 891         while (!fmt.empty()) {
 892                 if (counter > max_passes) {
 893                         LYXERR0("Recursion limit reached while parsing `"
 894                                 << format << "'.");
 895                         return _("ERROR!");
 896                 }
 897
 898                 char_type thischar = fmt[0];
 899                 if (thischar == '%') {
 900                         // beginning or end of key
 901                         if (scanning_key) {
 902                                 // end of key
 903                                 scanning_key = false;
 904                                 // so we replace the key with its value, which may be empty
 905                                 if (key[0] == '!') {
 906                                         // macro
 907                                         string const val =
 908                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 909                                         fmt = from_utf8(val) + fmt.substr(1);
 910                                         counter += 1;
 911                                         continue;
 912                                 } else if (prefixIs(key, "B_")) {
 913                                         // a translatable bit (to the Buffer language)
 914                                         string const val =
 915                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 916                                         docstring const trans =
 917                                                 translateIfPossible(from_utf8(val), buf.params().language->code());
 918                                         ret << trans;
 919                                 } else if (key[0] == '_') {
 920                                         // a translatable bit (to the GUI language)
 921                                         string const val =
 922                                                 buf.params().documentClass().getCiteMacro(engine_type, key);
 923                                         docstring const trans =
 924                                                 translateIfPossible(from_utf8(val));
 925                                         ret << trans;
 926                                 } else {
 927                                         docstring const val =
 928                                                 getValueForKey(key, buf, ci, xrefs, max_keysize);
 929                                         if (!scanning_rich)
 930                                                 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
 931                                         ret << val;
 932                                         if (!scanning_rich)
 933                                                 ret << from_ascii("{!</span>!}");
 934                                 }
 935                         } else {
 936                                 // beginning of key
 937                                 key.clear();
 938                                 scanning_key = true;
 939                         }
 940                 }
 941                 else if (thischar == '{') {
 942                         // beginning of option?
 943                         if (scanning_key) {
 944                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
 945                                 return _("ERROR!");
 946                         }
 947                         if (fmt.size() > 1) {
 948                                 if (fmt[1] == '%') {
 949                                         // it is the beginning of an optional format
 950                                         string optkey;
 951                                         docstring ifpart;
 952                                         docstring elsepart;
 953                                         docstring const newfmt =
 954                                                 parseOptions(fmt, optkey, ifpart, elsepart);
 955                                         if (newfmt == fmt) // parse error
 956                                                 return _("ERROR!");
 957                                         fmt = newfmt;
 958                                         docstring const val =
 959                                                 getValueForKey(optkey, buf, ci, xrefs);
 960                                         if (optkey == "next" && next)
 961                                                 ret << ifpart; // without expansion
 962                                         else if (optkey == "second" && second) {
 963                                                 int newcounter = 0;
 964                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 965                                                         ci, next);
 966                                         } else if (!val.empty()) {
 967                                                 int newcounter = 0;
 968                                                 ret << expandFormat(ifpart, xrefs, newcounter, buf,
 969                                                         ci, next);
 970                                         } else if (!elsepart.empty()) {
 971                                                 int newcounter = 0;
 972                                                 ret << expandFormat(elsepart, xrefs, newcounter, buf,
 973                                                         ci, next);
 974                                         }
 975                                         // fmt will have been shortened for us already
 976                                         continue;
 977                                 }
 978                                 if (fmt[1] == '!') {
 979                                         // beginning of rich text
 980                                         scanning_rich = true;
 981                                         fmt = fmt.substr(2);
 982                                         ret << from_ascii("{!");
 983                                         continue;
 984                                 }
 985                         }
 986                         // we are here if '{' was not followed by % or !.
 987                         // So it's just a character.
 988                         ret << thischar;
 989                 }
 990                 else if (scanning_rich && thischar == '!'
 991                          && fmt.size() > 1 && fmt[1] == '}') {
 992                         // end of rich text
 993                         scanning_rich = false;
 994                         fmt = fmt.substr(2);
 995                         ret << from_ascii("!}");
 996                         continue;
 997                 }
 998                 else if (scanning_key)
 999                         key += char(thischar);
1000                 else {
1001                         try {
1002                                 ret.put(thischar);
1003                         } catch (EncodingException & /* e */) {
1004                                 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
1005                         }
1006                 }
1007                 fmt = fmt.substr(1);
1008         } // for loop
1009         if (scanning_key) {
1010                 LYXERR0("Never found end of key in `" << format << "'!");
1011                 return _("ERROR!");
1012         }
1013         if (scanning_rich) {
1014                 LYXERR0("Never found end of rich text in `" << format << "'!");
1015                 return _("ERROR!");
1016         }
1017         return ret.str();
1018 }
1019
1020
1021 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
1022         Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1023 {
1024         bool const richtext = ci.richtext;
1025
1026         CiteEngineType const engine_type = buf.params().citeEngineType();
1027         DocumentClass const & dc = buf.params().documentClass();
1028         docstring const & format = format_in.empty()?
1029                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1030                               : format_in;
1031
1032         if (format != format_) {
1033                 // clear caches since format changed
1034                 info_.clear();
1035                 info_richtext_.clear();
1036                 format_ = format;
1037         }
1038
1039         if (!richtext && !info_.empty()) {
1040                 info_ = convertLaTeXCommands(processRichtext(info_, false));
1041                 return info_;
1042         }
1043         if (richtext && !info_richtext_.empty())
1044                 return info_richtext_;
1045
1046         if (!is_bibtex_) {
1047                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1048                 info_ = it->second;
1049                 return info_;
1050         }
1051
1052         int counter = 0;
1053         info_ = expandFormat(format, xrefs, counter, buf,
1054                 ci, false, false);
1055
1056         if (info_.empty()) {
1057                 // this probably shouldn't happen
1058                 return info_;
1059         }
1060
1061         if (richtext) {
1062                 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1063                 return info_richtext_;
1064         }
1065
1066         info_ = convertLaTeXCommands(processRichtext(info_, false));
1067         return info_;
1068 }
1069
1070
1071 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1072         Buffer const & buf, docstring const & format,
1073         CiteItem const & ci, bool next, bool second) const
1074 {
1075         docstring loclabel;
1076
1077         int counter = 0;
1078         loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1079
1080         if (!loclabel.empty() && !next) {
1081                 loclabel = processRichtext(loclabel, ci.richtext);
1082                 loclabel = convertLaTeXCommands(loclabel);
1083         }
1084
1085         return loclabel;
1086 }
1087
1088
1089 docstring const & BibTeXInfo::operator[](docstring const & field) const
1090 {
1091         BibTeXInfo::const_iterator it = find(field);
1092         if (it != end())
1093                 return it->second;
1094         static docstring const empty_value = docstring();
1095         return empty_value;
1096 }
1097
1098
1099 docstring const & BibTeXInfo::operator[](string const & field) const
1100 {
1101         return operator[](from_ascii(field));
1102 }
1103
1104
1105 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1106         CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1107 {
1108         // anything less is pointless
1109         LASSERT(maxsize >= 16, maxsize = 16);
1110         string key = oldkey;
1111         bool cleanit = false;
1112         if (prefixIs(oldkey, "clean:")) {
1113                 key = oldkey.substr(6);
1114                 cleanit = true;
1115         }
1116
1117         docstring ret = operator[](key);
1118         if (ret.empty() && !xrefs.empty()) {
1119                 // xr is a (reference to a) BibTeXInfo const *
1120                 for (auto const & xr : xrefs) {
1121                         if (xr && !(*xr)[key].empty()) {
1122                                 ret = (*xr)[key];
1123                                 break;
1124                         }
1125                 }
1126         }
1127         if (ret.empty()) {
1128                 // some special keys
1129                 // FIXME: dialog, textbefore and textafter have nothing to do with this
1130                 if (key == "dialog" && ci.context == CiteItem::Dialog)
1131                         ret = from_ascii("x"); // any non-empty string will do
1132                 else if (key == "export" && ci.context == CiteItem::Export)
1133                         ret = from_ascii("x"); // any non-empty string will do
1134                 else if (key == "ifstar" && ci.Starred)
1135                         ret = from_ascii("x"); // any non-empty string will do
1136                 else if (key == "ifqualified" && ci.isQualified)
1137                         ret = from_ascii("x"); // any non-empty string will do
1138                 else if (key == "entrytype")
1139                         ret = entry_type_;
1140                 else if (prefixIs(key, "ifentrytype:")
1141                          && from_ascii(key.substr(12)) == entry_type_)
1142                         ret = from_ascii("x"); // any non-empty string will do
1143                 else if (key == "key")
1144                         ret = bib_key_;
1145                 else if (key == "label")
1146                         ret = label_;
1147                 else if (key == "modifier" && modifier_ != 0)
1148                         ret = modifier_;
1149                 else if (key == "numericallabel")
1150                         ret = cite_number_;
1151                 else if (prefixIs(key, "ifmultiple:")) {
1152                         // Return whether we have multiple authors
1153                         docstring const kind = operator[](from_ascii(key.substr(11)));
1154                         if (multipleAuthors(kind))
1155                                 ret = from_ascii("x"); // any non-empty string will do
1156                 }
1157                 else if (prefixIs(key, "abbrvnames:")) {
1158                         // Special key to provide abbreviated name list,
1159                         // with respect to maxcitenames. Suitable for Bibliography
1160                         // beginnings.
1161                         docstring const kind = operator[](from_ascii(key.substr(11)));
1162                         ret = getAuthorList(&buf, kind, false, false, true);
1163                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1164                                 ret[0] = uppercase(ret[0]);
1165                 } else if (prefixIs(key, "fullnames:")) {
1166                         // Return a full name list. Suitable for Bibliography
1167                         // beginnings.
1168                         docstring const kind = operator[](from_ascii(key.substr(10)));
1169                         ret = getAuthorList(&buf, kind, true, false, true);
1170                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1171                                 ret[0] = uppercase(ret[0]);
1172                 } else if (prefixIs(key, "forceabbrvnames:")) {
1173                         // Special key to provide abbreviated name lists,
1174                         // irrespective of maxcitenames. Suitable for Bibliography
1175                         // beginnings.
1176                         docstring const kind = operator[](from_ascii(key.substr(15)));
1177                         ret = getAuthorList(&buf, kind, false, true, true);
1178                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1179                                 ret[0] = uppercase(ret[0]);
1180                 } else if (prefixIs(key, "abbrvbynames:")) {
1181                         // Special key to provide abbreviated name list,
1182                         // with respect to maxcitenames. Suitable for further names inside a
1183                         // bibliography item // (such as "ed. by ...")
1184                         docstring const kind = operator[](from_ascii(key.substr(11)));
1185                         ret = getAuthorList(&buf, kind, false, false, true, false);
1186                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1187                                 ret[0] = uppercase(ret[0]);
1188                 } else if (prefixIs(key, "fullbynames:")) {
1189                         // Return a full name list. Suitable for further names inside a
1190                         // bibliography item // (such as "ed. by ...")
1191                         docstring const kind = operator[](from_ascii(key.substr(10)));
1192                         ret = getAuthorList(&buf, kind, true, false, true, false);
1193                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1194                                 ret[0] = uppercase(ret[0]);
1195                 } else if (prefixIs(key, "forceabbrvbynames:")) {
1196                         // Special key to provide abbreviated name lists,
1197                         // irrespective of maxcitenames. Suitable for further names inside a
1198                         // bibliography item // (such as "ed. by ...")
1199                         docstring const kind = operator[](from_ascii(key.substr(15)));
1200                         ret = getAuthorList(&buf, kind, false, true, true, false);
1201                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1202                                 ret[0] = uppercase(ret[0]);
1203                 } else if (key == "abbrvciteauthor") {
1204                         // Special key to provide abbreviated author or
1205                         // editor names (suitable for citation labels),
1206                         // with respect to maxcitenames.
1207                         ret = getAuthorOrEditorList(&buf, false, false);
1208                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1209                                 ret[0] = uppercase(ret[0]);
1210                 } else if (key == "fullciteauthor") {
1211                         // Return a full author or editor list (for citation labels)
1212                         ret = getAuthorOrEditorList(&buf, true, false);
1213                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1214                                 ret[0] = uppercase(ret[0]);
1215                 } else if (key == "forceabbrvciteauthor") {
1216                         // Special key to provide abbreviated author or
1217                         // editor names (suitable for citation labels),
1218                         // irrespective of maxcitenames.
1219                         ret = getAuthorOrEditorList(&buf, false, true);
1220                         if (ci.forceUpperCase && isLowerCase(ret[0]))
1221                                 ret[0] = uppercase(ret[0]);
1222                 } else if (key == "bibentry") {
1223                         // Special key to provide the full bibliography entry: see getInfo()
1224                         CiteEngineType const engine_type = buf.params().citeEngineType();
1225                         DocumentClass const & dc = buf.params().documentClass();
1226                         docstring const & format =
1227                                 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1228                         int counter = 0;
1229                         ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1230                 } else if (key == "textbefore")
1231                         ret = ci.textBefore;
1232                 else if (key == "textafter")
1233                         ret = ci.textAfter;
1234                 else if (key == "curpretext") {
1235                         vector<pair<docstring, docstring>> pres = ci.getPretexts();
1236                         vector<pair<docstring, docstring>>::iterator it = pres.begin();
1237                         int numkey = 1;
1238                         for (; it != pres.end() ; ++it) {
1239                                 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1240                                         ret = (*it).second;
1241                                         pres.erase(it);
1242                                         break;
1243                                 }
1244                                 if ((*it).first == bib_key_)
1245                                         ++numkey;
1246                         }
1247                 } else if (key == "curposttext") {
1248                         vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1249                         vector<pair<docstring, docstring>>::iterator it = posts.begin();
1250                         int numkey = 1;
1251                         for (; it != posts.end() ; ++it) {
1252                                 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1253                                         ret = (*it).second;
1254                                         posts.erase(it);
1255                                         break;
1256                                 }
1257                                 if ((*it).first == bib_key_)
1258                                         ++numkey;
1259                         }
1260                 } else if (key == "year")
1261                         ret = getYear();
1262         }
1263
1264         if (cleanit)
1265                 ret = xml::cleanAttr(ret);
1266
1267         // make sure it is not too big
1268         support::truncateWithEllipsis(ret, maxsize);
1269         return ret;
1270 }
1271
1272
1273 //////////////////////////////////////////////////////////////////////
1274 //
1275 // BiblioInfo
1276 //
1277 //////////////////////////////////////////////////////////////////////
1278
1279 namespace {
1280
1281 // A functor for use with sort, leading to case insensitive sorting
1282 bool compareNoCase(const docstring & a, const docstring & b) {
1283         return compare_no_case(a, b) < 0;
1284 }
1285
1286 } // namespace
1287
1288
1289 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1290 {
1291         vector<docstring> result;
1292         if (!data.isBibTeX())
1293                 return result;
1294         // Legacy crossref field. This is not nestable.
1295         if (!nested && !data["crossref"].empty()) {
1296                 docstring const xrefkey = data["crossref"];
1297                 result.push_back(xrefkey);
1298                 // However, check for nested xdatas
1299                 BiblioInfo::const_iterator it = find(xrefkey);
1300                 if (it != end()) {
1301                         BibTeXInfo const & xref = it->second;
1302                         vector<docstring> const nxdata = getXRefs(xref, true);
1303                         if (!nxdata.empty())
1304                                 result.insert(result.end(), nxdata.begin(), nxdata.end());
1305                 }
1306         }
1307         // Biblatex's xdata field. Infinitely nestable.
1308         // XData field can consist of a comma-separated list of keys
1309         vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1310         if (!xdatakeys.empty()) {
1311                 for (auto const & xdatakey : xdatakeys) {
1312                         result.push_back(xdatakey);
1313                         BiblioInfo::const_iterator it = find(xdatakey);
1314                         if (it != end()) {
1315                                 BibTeXInfo const & xdata = it->second;
1316                                 vector<docstring> const nxdata = getXRefs(xdata, true);
1317                                 if (!nxdata.empty())
1318                                         result.insert(result.end(), nxdata.begin(), nxdata.end());
1319                         }
1320                 }
1321         }
1322         return result;
1323 }
1324
1325
1326 vector<docstring> const BiblioInfo::getKeys() const
1327 {
1328         vector<docstring> bibkeys;
1329         for (auto const & bi : *this)
1330                 bibkeys.push_back(bi.first);
1331         sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1332         return bibkeys;
1333 }
1334
1335
1336 vector<docstring> const BiblioInfo::getFields() const
1337 {
1338         vector<docstring> bibfields;
1339         for (auto const & fn : field_names_)
1340                 bibfields.push_back(fn);
1341         sort(bibfields.begin(), bibfields.end());
1342         return bibfields;
1343 }
1344
1345
1346 vector<docstring> const BiblioInfo::getEntries() const
1347 {
1348         vector<docstring> bibentries;
1349         for (auto const & et : entry_types_)
1350                 bibentries.push_back(et);
1351         sort(bibentries.begin(), bibentries.end());
1352         return bibentries;
1353 }
1354
1355
1356 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1357 {
1358         BiblioInfo::const_iterator it = find(key);
1359         if (it == end())
1360                 return docstring();
1361         BibTeXInfo const & data = it->second;
1362         return data.getAuthorOrEditorList(&buf, false);
1363 }
1364
1365
1366 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1367 {
1368         BiblioInfo::const_iterator it = find(key);
1369         if (it == end())
1370                 return docstring();
1371         BibTeXInfo const & data = it->second;
1372         return data.citeNumber();
1373 }
1374
1375 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1376 {
1377         BiblioInfo::const_iterator it = find(key);
1378          if (it == end())
1379                 return;
1380         BibTeXInfo const & data = it->second;
1381         data.getLocators(doi,url,file);
1382 }
1383
1384
1385 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1386 {
1387         BiblioInfo::const_iterator it = find(key);
1388         if (it == end())
1389                 return docstring();
1390         BibTeXInfo const & data = it->second;
1391         docstring year = data.getYear();
1392         if (year.empty()) {
1393                 // let's try the crossrefs
1394                 vector<docstring> const xrefs = getXRefs(data);
1395                 if (xrefs.empty())
1396                         // no luck
1397                         return docstring();
1398                 for (docstring const & xref : xrefs) {
1399                         BiblioInfo::const_iterator const xrefit = find(xref);
1400                         if (xrefit == end())
1401                                 continue;
1402                         BibTeXInfo const & xref_data = xrefit->second;
1403                         year = xref_data.getYear();
1404                         if (!year.empty())
1405                                 // success!
1406                                 break;
1407                 }
1408         }
1409         if (use_modifier && data.modifier() != 0)
1410                 year += data.modifier();
1411         return year;
1412 }
1413
1414
1415 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1416 {
1417         docstring const year = getYear(key, use_modifier);
1418         if (year.empty())
1419                 return buf.B_("No year");
1420         return year;
1421 }
1422
1423
1424 docstring const BiblioInfo::getInfo(docstring const & key,
1425         Buffer const & buf, CiteItem const & ci, docstring const & format) const
1426 {
1427         BiblioInfo::const_iterator it = find(key);
1428         if (it == end())
1429                 return _("Bibliography entry not found!");
1430         BibTeXInfo const & data = it->second;
1431         BibTeXInfoList xrefptrs;
1432         for (docstring const & xref : getXRefs(data)) {
1433                 BiblioInfo::const_iterator const xrefit = find(xref);
1434                 if (xrefit != end())
1435                         xrefptrs.push_back(&(xrefit->second));
1436         }
1437         return data.getInfo(xrefptrs, buf, ci, format);
1438 }
1439
1440
1441 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1442         Buffer const & buf, string const & style, CiteItem const & ci) const
1443 {
1444         size_t max_size = ci.max_size;
1445         // shorter makes no sense
1446         LASSERT(max_size >= 16, max_size = 16);
1447
1448         // we can't display more than 10 of these, anyway
1449         // but since we truncate in the middle,
1450         // we need to split into two halfs.
1451         bool const too_many_keys = keys.size() > 10;
1452         vector<docstring> lkeys;
1453         if (too_many_keys) {
1454                 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1455                 keys.resize(5);
1456                 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1457         }
1458
1459         CiteEngineType const engine_type = buf.params().citeEngineType();
1460         DocumentClass const & dc = buf.params().documentClass();
1461         docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1462         docstring ret = format;
1463         vector<docstring>::const_iterator key = keys.begin();
1464         vector<docstring>::const_iterator ken = keys.end();
1465         vector<docstring> handled_keys;
1466         for (int i = 0; key != ken; ++key, ++i) {
1467                 handled_keys.push_back(*key);
1468                 int n = 0;
1469                 for (auto const & k : handled_keys) {
1470                         if (k == *key)
1471                                 ++n;
1472                 }
1473                 BiblioInfo::const_iterator it = find(*key);
1474                 BibTeXInfo empty_data;
1475                 empty_data.key(*key);
1476                 BibTeXInfo & data = empty_data;
1477                 vector<BibTeXInfo const *> xrefptrs;
1478                 if (it != end()) {
1479                         data = it->second;
1480                         for (docstring const & xref : getXRefs(data)) {
1481                                 BiblioInfo::const_iterator const xrefit = find(xref);
1482                                 if (xrefit != end())
1483                                         xrefptrs.push_back(&(xrefit->second));
1484                         }
1485                 }
1486                 data.numKey(n);
1487                 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1488         }
1489
1490         support::truncateWithEllipsis(ret, max_size, true);
1491
1492         return ret;
1493 }
1494
1495
1496 bool BiblioInfo::isBibtex(docstring const & key) const
1497 {
1498         docstring key1;
1499         split(key, key1, ',');
1500         BiblioInfo::const_iterator it = find(key1);
1501         if (it == end())
1502                 return false;
1503         return it->second.isBibTeX();
1504 }
1505
1506
1507 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1508         vector<docstring> const & keys, vector<CitationStyle> const & styles,
1509         Buffer const & buf, CiteItem const & ci) const
1510 {
1511         if (empty())
1512                 return vector<pair<docstring,docstring>>();
1513
1514         string style;
1515         CiteStringMap csm(styles.size());
1516         for (size_t i = 0; i != csm.size(); ++i) {
1517                 style = styles[i].name;
1518                 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1519         }
1520
1521         return csm;
1522 }
1523
1524
1525 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1526 {
1527         bimap_.insert(info.begin(), info.end());
1528         field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1529         entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1530 }
1531
1532
1533 namespace {
1534
1535 // used in xhtml to sort a list of BibTeXInfo objects
1536 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1537 {
1538         docstring const lauth = lhs->getAuthorOrEditorList();
1539         docstring const rauth = rhs->getAuthorOrEditorList();
1540         docstring const lyear = lhs->getYear();
1541         docstring const ryear = rhs->getYear();
1542         docstring const ltitl = lhs->operator[]("title");
1543         docstring const rtitl = rhs->operator[]("title");
1544         return  (lauth < rauth)
1545                 || (lauth == rauth && lyear < ryear)
1546                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1547 }
1548
1549 } // namespace
1550
1551
1552 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1553 {
1554         cited_entries_.clear();
1555         // We are going to collect all the citation keys used in the document,
1556         // getting them from the TOC.
1557         // FIXME We may want to collect these differently, in the first case,
1558         // so that we might have them in order of appearance.
1559         set<docstring> citekeys;
1560         Toc const & toc = *buf.tocBackend().toc("citation");
1561         for (auto const & t : toc) {
1562                 if (t.str().empty())
1563                         continue;
1564                 vector<docstring> const keys = getVectorFromString(t.str());
1565                 citekeys.insert(keys.begin(), keys.end());
1566         }
1567         if (citekeys.empty())
1568                 return;
1569
1570         // We have a set of the keys used in this document.
1571         // We will now convert it to a list of the BibTeXInfo objects used in
1572         // this document...
1573         vector<BibTeXInfo const *> bi;
1574         for (auto const & ck : citekeys) {
1575                 BiblioInfo::const_iterator const bt = find(ck);
1576                 if (bt == end() || !bt->second.isBibTeX())
1577                         continue;
1578                 bi.push_back(&(bt->second));
1579         }
1580         // ...and sort it.
1581         sort(bi.begin(), bi.end(), lSorter);
1582
1583         // Now we can write the sorted keys
1584         // b is a BibTeXInfo const *
1585         for (auto const & b : bi)
1586                 cited_entries_.push_back(b->key());
1587 }
1588
1589
1590 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1591 {
1592         collectCitedEntries(buf);
1593         CiteEngineType const engine_type = buf.params().citeEngineType();
1594         bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1595
1596         int keynumber = 0;
1597         char modifier = 0;
1598         // used to remember the last one we saw
1599         // we'll be comparing entries to see if we need to add
1600         // modifiers, like "1984a"
1601         map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1602
1603         // add letters to years
1604         for (auto const & ce : cited_entries_) {
1605                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1606                 // this shouldn't happen, but...
1607                 if (biit == bimap_.end())
1608                         // ...fail gracefully, anyway.
1609                         continue;
1610                 BibTeXInfo & entry = biit->second;
1611                 if (numbers) {
1612                         docstring const num = convert<docstring>(++keynumber);
1613                         entry.setCiteNumber(num);
1614                 } else {
1615                         // The first test here is checking whether this is the first
1616                         // time through the loop. If so, then we do not have anything
1617                         // with which to compare.
1618                         if (last != bimap_.end()
1619                             && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1620                             // we access the year via getYear() so as to get it from the xref,
1621                             // if we need to do so
1622                             && getYear(entry.key()) == getYear(last->second.key())) {
1623                                 if (modifier == 0) {
1624                                         // so the last one should have been 'a'
1625                                         last->second.setModifier('a');
1626                                         modifier = 'b';
1627                                 } else if (modifier == 'z')
1628                                         modifier = 'A';
1629                                 else
1630                                         modifier++;
1631                         } else {
1632                                 modifier = 0;
1633                         }
1634                         entry.setModifier(modifier);
1635                         // remember the last one
1636                         last = biit;
1637                 }
1638         }
1639         // Set the labels
1640         for (auto const & ce : cited_entries_) {
1641                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1642                 // this shouldn't happen, but...
1643                 if (biit == bimap_.end())
1644                         // ...fail gracefully, anyway.
1645                         continue;
1646                 BibTeXInfo & entry = biit->second;
1647                 if (numbers) {
1648                         entry.label(entry.citeNumber());
1649                 } else {
1650                         docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1651                         // we do it this way so as to access the xref, if necessary
1652                         // note that this also gives us the modifier
1653                         docstring const year = getYear(ce, buf, true);
1654                         if (!auth.empty() && !year.empty())
1655                                 entry.label(auth + ' ' + year);
1656                         else
1657                                 entry.label(entry.key());
1658                 }
1659         }
1660 }
1661
1662
1663 //////////////////////////////////////////////////////////////////////
1664 //
1665 // CitationStyle
1666 //
1667 //////////////////////////////////////////////////////////////////////
1668
1669
1670 CitationStyle citationStyleFromString(string const & command,
1671                                       BufferParams const & params)
1672 {
1673         CitationStyle cs;
1674         if (command.empty())
1675                 return cs;
1676
1677         string const alias = params.getCiteAlias(command);
1678         string cmd = alias.empty() ? command : alias;
1679         if (isUpperCase(command[0])) {
1680                 cs.forceUpperCase = true;
1681                 cmd[0] = lowercase(cmd[0]);
1682         }
1683
1684         size_t const n = command.size() - 1;
1685         if (command[n] == '*') {
1686                 cs.hasStarredVersion = true;
1687                 if (suffixIs(cmd, '*'))
1688                         cmd = cmd.substr(0, cmd.size() - 1);
1689         }
1690
1691         cs.name = cmd;
1692         return cs;
1693 }
1694
1695
1696 string citationStyleToString(const CitationStyle & cs, bool const latex)
1697 {
1698         string cmd = latex ? cs.cmd : cs.name;
1699         if (cs.forceUpperCase)
1700                 cmd[0] = uppercase(cmd[0]);
1701         if (cs.hasStarredVersion)
1702                 cmd += '*';
1703         return cmd;
1704 }
1705
1706
1707 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1708 {
1709         // This function closely mimics getAuthorList, but produces DocBook instead of text.
1710         // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1711         // as the output has a database-like shape.
1712         // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1713
1714         if (authorsString.empty()) {
1715                 return docstring();
1716         }
1717
1718         // Split the input list of authors into individual authors.
1719         vector<docstring> const authors = getAuthors(authorsString);
1720
1721         // Retrieve the "et al." variation.
1722         string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1723
1724         // Output the list of authors.
1725         xs << xml::StartTag("authorgroup");
1726         xs << xml::CR();
1727
1728         auto it = authors.cbegin();
1729         auto en = authors.cend();
1730         for (size_t i = 0; it != en; ++it, ++i) {
1731                 xs << xml::StartTag("author");
1732                 xs << xml::CR();
1733                 xs << xml::StartTag("personname");
1734                 xs << xml::CR();
1735                 docstring name = *it;
1736
1737                 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1738                 if (name == "others") {
1739                         xs << buf.B_(etal);
1740                 } else {
1741                         name_parts parts = nameParts(name);
1742                         if (! parts.prefix.empty()) {
1743                                 xs << xml::StartTag("honorific");
1744                                 xs << parts.prefix;
1745                                 xs << xml::EndTag("honorific");
1746                                 xs << xml::CR();
1747                         }
1748                         if (! parts.prename.empty()) {
1749                                 xs << xml::StartTag("firstname");
1750                                 xs << parts.prename;
1751                                 xs << xml::EndTag("firstname");
1752                                 xs << xml::CR();
1753                         }
1754                         if (! parts.surname.empty()) {
1755                                 xs << xml::StartTag("surname");
1756                                 xs << parts.surname;
1757                                 xs << xml::EndTag("surname");
1758                                 xs << xml::CR();
1759                         }
1760                         if (! parts.suffix.empty()) {
1761                                 xs << xml::StartTag("othername", "role=\"suffix\"");
1762                                 xs << parts.suffix;
1763                                 xs << xml::EndTag("othername");
1764                                 xs << xml::CR();
1765                         }
1766                 }
1767
1768                 xs << xml::EndTag("personname");
1769                 xs << xml::CR();
1770                 xs << xml::EndTag("author");
1771                 xs << xml::CR();
1772
1773                 // Could add an affiliation after <personname>, but not stored in BibTeX.
1774         }
1775         xs << xml::EndTag("authorgroup");
1776         xs << xml::CR();
1777
1778         return docstring();
1779 }
1780
1781 } // namespace lyx