]> git.lyx.org Git - lyx.git/blobdiff - src/BiblioInfo.cpp
* ca/Intro.lyx from Daniel
[lyx.git] / src / BiblioInfo.cpp
index 9a353280e3be5394c77758796ada0e9c3998ab38..36509c6c7c9557d0525acb56acd7b5fbc1ed4aca 100644 (file)
@@ -5,8 +5,9 @@
  *
  * \author Angus Leeming
  * \author Herbert Voß
- * \author Richard Heck
+ * \author Richard Kimberly Heck
  * \author Julien Rioux
+ * \author Jürgen Spitzmüller
  *
  * Full author contact details are available in file CREDITS.
  */
 #include <config.h>
 
 #include "BiblioInfo.h"
+
 #include "Buffer.h"
 #include "BufferParams.h"
-#include "buffer_funcs.h"
+#include "Citation.h"
 #include "Encoding.h"
-#include "InsetIterator.h"
 #include "Language.h"
-#include "output_xhtml.h"
-#include "Paragraph.h"
 #include "TextClass.h"
 #include "TocBackend.h"
+#include "xml.h"
 
 #include "support/convert.h"
 #include "support/debug.h"
 #include "support/docstream.h"
+#include "support/FileName.h"
 #include "support/gettext.h"
 #include "support/lassert.h"
 #include "support/lstrings.h"
-#include "support/regex.h"
 #include "support/textutils.h"
 
+#include <map>
+#include <regex>
 #include <set>
 
 using namespace std;
@@ -44,51 +46,281 @@ namespace lyx {
 
 namespace {
 
-// gets the "family name" from an author-type string
-docstring familyName(docstring const & name)
+// Remove placeholders from names
+docstring renormalize(docstring const & input)
 {
-       if (name.empty())
-               return docstring();
+       docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
+       return subst(res, from_ascii("$$comma!"), from_ascii(","));
+}
+
+
+// Split the surname into prefix ("von-part") and family name
+pair<docstring, docstring> parseSurname(docstring const & sname)
+{
+       // Split the surname into its tokens
+       vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
+       if (pieces.size() < 2)
+               return make_pair(docstring(), sname);
+
+       // Now we look for pieces that begin with a lower case letter.
+       // All except for the very last token constitute the "von-part".
+       docstring prefix;
+       vector<docstring>::const_iterator it = pieces.begin();
+       vector<docstring>::const_iterator const en = pieces.end();
+       bool first = true;
+       for (; it != en; ++it) {
+               if ((*it).empty())
+                       continue;
+               // If this is the last piece, then what we now have is
+               // the family name, notwithstanding the casing.
+               if (it + 1 == en)
+                       break;
+               char_type const c = (*it)[0];
+               // If the piece starts with a upper case char, we assume
+               // this is part of the surname.
+               if (!isLower(c))
+                       break;
+               // Nothing of the former, so add this piece to the prename
+               if (!first)
+                       prefix += " ";
+               else
+                       first = false;
+               prefix += *it;
+       }
+
+       // Reconstruct the family name.
+       // Note that if we left the loop with because it + 1 == en,
+       // then this will still do the right thing, i.e., make surname
+       // just be the last piece.
+       docstring surname;
+       first = true;
+       for (; it != en; ++it) {
+               if (!first)
+                       surname += " ";
+               else
+                       first = false;
+               surname += *it;
+       }
+       return make_pair(prefix, surname);
+}
+
+
+struct name_parts {
+       docstring surname;
+       docstring prename;
+       docstring suffix;
+       docstring prefix;
+};
+
 
-       // first we look for a comma, and take the last name to be everything
-       // preceding the right-most one, so that we also get the "jr" part.
-       docstring::size_type idx = name.rfind(',');
-       if (idx != docstring::npos)
-               return ltrim(name.substr(0, idx));
+// gets the name parts (prename, surname, prefix, suffix) from an author-type string
+name_parts nameParts(docstring const & iname)
+{
+       name_parts res;
+       if (iname.empty())
+               return res;
+
+       // First we check for goupings (via {...}) and replace blanks and
+       // commas inside groups with temporary placeholders
+       docstring name;
+       int gl = 0;
+       docstring::const_iterator p = iname.begin();
+       while (p != iname.end()) {
+               // count grouping level
+               if (*p == '{')
+                       ++gl;
+               else if (*p == '}')
+                       --gl;
+               // generate string with probable placeholders
+               if (*p == ' ' && gl > 0)
+                       name += from_ascii("$$space!");
+               else if (*p == ',' && gl > 0)
+                       name += from_ascii("$$comma!");
+               else
+                       name += *p;
+               ++p;
+       }
 
-       // OK, so now we want to look for the last name. We're going to
-       // include the "von" part. This isn't perfect.
+       // Now we look for a comma, and take the last name to be everything
+       // preceding the right-most one, so that we also get the name suffix
+       // (aka "jr" part).
+       vector<docstring> pieces = getVectorFromString(name);
+       if (pieces.size() > 1) {
+               // Whether we have a name suffix or not, the prename is
+               // always last item
+               res.prename = renormalize(pieces.back());
+               // The family name, conversely, is always the first item.
+               // However, it might contain a prefix (aka "von" part)
+               docstring const sname = pieces.front();
+               res.prefix = renormalize(parseSurname(sname).first);
+               res.surname = renormalize(parseSurname(sname).second);
+               // If we have three pieces (the maximum allowed by BibTeX),
+               // the second one is the name suffix.
+               if (pieces.size() > 2)
+                       res.suffix = renormalize(pieces.at(1));
+               return res;
+       }
+
+       // OK, so now we want to look for the last name.
        // Split on spaces, to get various tokens.
-       vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
-       // If we only get two, assume the last one is the last name
-       if (pieces.size() <= 2)
-               return pieces.back();
+       pieces = getVectorFromString(name, from_ascii(" "));
+       // No space: Only a family name given
+       if (pieces.size() < 2) {
+               res.surname = renormalize(pieces.back());
+               return res;
+       }
+       // If we get two pieces, assume "prename surname"
+       if (pieces.size() == 2) {
+               res.prename = renormalize(pieces.front());
+               res.surname = renormalize(pieces.back());
+               return res;
+       }
 
-       // Now we look for the first token that begins with a lower case letter.
+       // More than 3 pieces: A name prefix (aka "von" part) might be included.
+       // We look for the first piece that begins with a lower case letter
+       // (which is the name prefix, if it is not the last token) or the last token.
+       docstring prename;
        vector<docstring>::const_iterator it = pieces.begin();
-       vector<docstring>::const_iterator en = pieces.end();
+       vector<docstring>::const_iterator const en = pieces.end();
+       bool first = true;
        for (; it != en; ++it) {
                if ((*it).empty())
                        continue;
                char_type const c = (*it)[0];
+               // If the piece starts with a lower case char, we assume
+               // this is the name prefix and thus prename is complete.
                if (isLower(c))
                        break;
+               // Same if this is the last piece, which is always the surname.
+               if (it + 1 == en)
+                       break;
+               // Nothing of the former, so add this piece to the prename
+               if (!first)
+                       prename += " ";
+               else
+                       first = false;
+               prename += *it;
        }
 
-       if (it == en) // we never found a "von"
-               return pieces.back();
-
-       // reconstruct what we need to return
-       docstring retval;
-       bool first = true;
+       // Now reconstruct the family name and strip the prefix.
+       // Note that if we left the loop because it + 1 == en,
+       // then this will still do the right thing, i.e., make surname
+       // just be the last piece.
+       docstring surname;
+       first = true;
        for (; it != en; ++it) {
                if (!first)
-                       retval += " ";
+                       surname += " ";
                else
                        first = false;
-               retval += *it;
+               surname += *it;
        }
-       return retval;
+       res.prename = renormalize(prename);
+       res.prefix = renormalize(parseSurname(surname).first);
+       res.surname = renormalize(parseSurname(surname).second);
+       return res;
+}
+
+
+docstring constructName(docstring const & name, string const & scheme)
+{
+       // re-constructs a name from name parts according
+       // to a given scheme
+       docstring const prename = nameParts(name).prename;
+       docstring const surname = nameParts(name).surname;
+       docstring const prefix = nameParts(name).prefix;
+       docstring const suffix = nameParts(name).suffix;
+       string res = scheme;
+       static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+       static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+       static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+       smatch sub;
+       // Changing the first parameter of regex_match() may corrupt the
+       // second one. In this case we use the temporary string tmp.
+       if (regex_match(scheme, sub, reg1)) {
+               res = sub.str(1);
+               if (!prename.empty())
+                       res += sub.str(3);
+               res += sub.str(5);
+       }
+       if (regex_match(res, sub, reg2)) {
+               string tmp = sub.str(1);
+               if (!suffix.empty())
+                       tmp += sub.str(3);
+               res = tmp + sub.str(5);
+       }
+       if (regex_match(res, sub, reg3)) {
+               string tmp = sub.str(1);
+               if (!prefix.empty())
+                       tmp += sub.str(3);
+               res = tmp + sub.str(5);
+       }
+       docstring result = from_ascii(res);
+       result = subst(result, from_ascii("%prename%"), prename);
+       result = subst(result, from_ascii("%surname%"), surname);
+       result = subst(result, from_ascii("%prefix%"), prefix);
+       result = subst(result, from_ascii("%suffix%"), suffix);
+       return result;
+}
+
+
+vector<docstring> const getAuthors(docstring const & author)
+{
+       // We check for goupings (via {...}) and only consider " and "
+       // outside groups as author separator. This is to account
+       // for cases such as {{Barnes and Noble, Inc.}}, which
+       // need to be treated as one single family name.
+       // We use temporary placeholders in order to differentiate the
+       // diverse " and " cases.
+
+       // First, we temporarily replace all ampersands. It is rather unusual
+       // in author names, but can happen (consider cases such as "C \& A Corp.").
+       docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
+       // Then, we temporarily make all " and " strings to ampersands in order
+       // to handle them later on a per-char level. Note that arbitrary casing
+       // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
+       static regex const and_reg("(.* )([aA][nN][dD])( .*)");
+       smatch sub;
+       string res = to_utf8(iname);
+       while (regex_match(res, sub, and_reg))
+               res = sub.str(1) + "&" + sub.str(3);
+       iname = from_utf8(res);
+       // Now we traverse through the string and replace the "&" by the proper
+       // output in- and outside groups
+       docstring name;
+       int gl = 0;
+       docstring::const_iterator p = iname.begin();
+       while (p != iname.end()) {
+               // count grouping level
+               if (*p == '{')
+                       ++gl;
+               else if (*p == '}')
+                       --gl;
+               // generate string with probable placeholders
+               if (*p == '&') {
+                       if (gl > 0)
+                               // Inside groups, we output "and"
+                               name += from_ascii("and");
+                       else
+                               // Outside groups, we output a separator
+                               name += from_ascii("$$namesep!");
+               }
+               else
+                       name += *p;
+               ++p;
+       }
+
+       // re-insert the literal ampersands
+       name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
+
+       // Now construct the actual vector
+       return getVectorFromString(name, from_ascii(" $$namesep! "));
+}
+
+
+bool multipleAuthors(docstring const & author)
+{
+       return getAuthors(author).size() > 1;
 }
 
 
@@ -101,6 +333,7 @@ docstring convertLaTeXCommands(docstring const & str)
 
        bool scanning_cmd = false;
        bool scanning_math = false;
+       bool is_section = false;
        bool escaped = false; // used to catch \$, etc.
        while (!val.empty()) {
                char_type const ch = val[0];
@@ -123,13 +356,24 @@ docstring convertLaTeXCommands(docstring const & str)
                // discard characters until we hit something that
                // isn't alpha.
                if (scanning_cmd) {
+                       if (!is_section && ch == 'S') {
+                               is_section = true;
+                               val = val.substr(1);
+                               continue;
+                       }
                        if (isAlphaASCII(ch)) {
+                               is_section = false;
                                val = val.substr(1);
                                escaped = false;
                                continue;
+                       } else if (is_section) {
+                               ret.push_back(0x00a7);
+                               is_section = false;
+                               continue;
                        }
                        // so we're done with this command.
                        // now we fall through and check this character.
+                       is_section = false;
                        scanning_cmd = false;
                }
 
@@ -146,6 +390,12 @@ docstring convertLaTeXCommands(docstring const & str)
                        continue;
                }
 
+               if (ch == '~') {
+                       ret += char_type(0x00a0);
+                       val = val.substr(1);
+                       continue;
+               }
+
                if (ch == '$') {
                        ret += ch;
                        val = val.substr(1);
@@ -153,7 +403,18 @@ docstring convertLaTeXCommands(docstring const & str)
                        continue;
                }
 
-               // we just ignore braces
+               // Change text mode accents in the form
+               // {\v a} to \v{a} (see #9340).
+               // FIXME: This is a sort of mini-tex2lyx.
+               //        Use the real tex2lyx instead!
+               static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
+               if (regex_search(to_utf8(val), tma_reg)) {
+                       val = val.substr(1);
+                       val.replace(2, 1, from_ascii("{"));
+                       continue;
+               }
+
+               // Apart from the above, we just ignore braces
                if (ch == '{' || ch == '}') {
                        val = val.substr(1);
                        continue;
@@ -174,8 +435,8 @@ docstring convertLaTeXCommands(docstring const & str)
                // look for that and change it, if necessary.
                // FIXME: This is a sort of mini-tex2lyx.
                //        Use the real tex2lyx instead!
-               static lyx::regex const reg("^\\\\\\W\\w");
-               if (lyx::regex_search(to_utf8(val), reg)) {
+               static regex const reg("^\\\\\\W\\w");
+               if (regex_search(to_utf8(val), reg)) {
                        val.insert(3, from_ascii("}"));
                        val.insert(2, from_ascii("{"));
                }
@@ -240,7 +501,7 @@ docstring processRichtext(docstring const & str, bool richtext)
        return ret;
 }
 
-} // anon namespace
+} // namespace
 
 
 //////////////////////////////////////////////////////////////////////
@@ -250,14 +511,31 @@ docstring processRichtext(docstring const & str, bool richtext)
 //////////////////////////////////////////////////////////////////////
 
 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-       : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
-         modifier_(0)
+       : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
+         info_(), format_(), modifier_(0)
 {}
 
 
-docstring const BibTeXInfo::getAbbreviatedAuthor(
-    Buffer const * buf, bool jurabib_style) const
+
+docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
+                                         bool full, bool forceshort) const
 {
+       docstring author = operator[]("author");
+       if (author.empty())
+               author = operator[]("editor");
+
+       return getAuthorList(buf, author, full, forceshort);
+}
+
+
+docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
+               docstring const & author, bool const full, bool const forceshort,
+               bool const allnames, bool const beginning) const
+{
+       // Maxnames treshold depend on engine
+       size_t maxnames = buf ?
+               buf->params().documentClass().max_citenames() : 2;
+
        if (!is_bibtex_) {
                docstring const opt = label();
                if (opt.empty())
@@ -269,45 +547,87 @@ docstring const BibTeXInfo::getAbbreviatedAuthor(
                        // in this case, we didn't find a "(",
                        // so we don't have author (year)
                        return docstring();
+               if (full) {
+                       // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
+                       docstring const fullauthors = trim(rsplit(remainder, ')'));
+                       if (!fullauthors.empty())
+                               return fullauthors;
+               }
                return authors;
        }
 
-       docstring author = operator[]("author");
-       if (author.empty()) {
-               author = operator[]("editor");
-               if (author.empty())
-                       return author;
-       }
+       if (author.empty())
+               return author;
 
-       // FIXME Move this to a separate routine that can
-       // be called from elsewhere.
-       //
        // OK, we've got some names. Let's format them.
-       // Try to split the author list on " and "
-       vector<docstring> const authors =
-               getVectorFromString(author, from_ascii(" and "));
-
-       if (jurabib_style && (authors.size() == 2 || authors.size() == 3)) {
-               docstring shortauthor = familyName(authors[0])
-                       + "/" + familyName(authors[1]);
-               if (authors.size() == 3)
-                       shortauthor += "/" + familyName(authors[2]);
-               return convertLaTeXCommands(shortauthor);
-       }
+       // Try to split the author list
+       vector<docstring> const authors = getAuthors(author);
 
-       docstring retval = familyName(authors[0]);
-
-       if (authors.size() == 2 && authors[1] != "others") {
-               docstring const dformat = buf ? 
-                       buf->B_("%1$s and %2$s") : from_ascii("%1$s and %2$s");
-               retval = bformat(dformat, familyName(authors[0]), familyName(authors[1]));
-       } else if (authors.size() >= 2) {
-               // we get here either if the author list is longer than two names
-               // or if the second 'name' is "others". we do the same thing either
-               // way.
-               docstring const dformat = buf ? 
-                       buf->B_("%1$s et al.") : from_ascii("%1$s et al.");
-               retval = bformat(dformat, familyName(authors[0]));
+       docstring retval;
+
+       CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
+                                              : ENGINE_TYPE_DEFAULT;
+
+       // These are defined in the styles
+       string const etal =
+               buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
+                   : " et al.";
+       string const namesep =
+               buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
+                  : ", ";
+       string const lastnamesep =
+               buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
+                   : ", and ";
+       string const pairnamesep =
+               buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
+                    : " and ";
+       string firstnameform =
+                       buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
+                            : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
+       if (!beginning)
+               firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
+                                            : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
+       string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
+                            : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
+       if (!beginning)
+               othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
+                                            : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
+       string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
+                            : "{%prefix%[[%prefix% ]]}%surname%";
+
+       // Shorten the list (with et al.) if forceshort is set
+       // and the list can actually be shortened, else if maxcitenames
+       // is passed and full is not set.
+       bool shorten = forceshort && authors.size() > 1;
+       vector<docstring>::const_iterator it = authors.begin();
+       vector<docstring>::const_iterator en = authors.end();
+       for (size_t i = 0; it != en; ++it, ++i) {
+               if (i >= maxnames && !full) {
+                       shorten = true;
+                       break;
+               }
+               if (*it == "others") {
+                       retval += buf ? buf->B_(etal) : from_ascii(etal);
+                       break;
+               }
+               if (i > 0 && i == authors.size() - 1) {
+                       if (authors.size() == 2)
+                               retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
+                       else
+                               retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
+               } else if (i > 0)
+                       retval += buf ? buf->B_(namesep) : from_ascii(namesep);
+               if (allnames)
+                       retval += (i == 0) ? constructName(*it, firstnameform)
+                               : constructName(*it, othernameform);
+               else
+                       retval += constructName(*it, citenameform);
+       }
+       if (shorten) {
+               if (allnames)
+                       retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
+               else
+                       retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
        }
 
        return convertLaTeXCommands(retval);
@@ -329,7 +649,9 @@ docstring const BibTeXInfo::getYear() const
                static regex const ereg(".*/[-]?([\\d]{4}).*");
                smatch sm;
                string const date = to_utf8(year);
-               regex_match(date, sm, yreg);
+               if (!regex_match(date, sm, yreg))
+                       // cannot parse year.
+                       return docstring();
                year = from_ascii(sm[1]);
                // check for an endyear
                if (regex_match(date, sm, ereg))
@@ -352,6 +674,75 @@ docstring const BibTeXInfo::getYear() const
 }
 
 
+void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
+{
+       if (is_bibtex_) {
+               // get "doi" entry from citation record
+               doi = operator[]("doi");
+               if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
+                       doi = "https://doi.org/" + doi;
+               // get "url" entry from citation record
+               url = operator[]("url");
+               // get "file" entry from citation record
+               file = operator[]("file");
+
+               // Jabref case, field has a format:
+               // Description:Location:Filetype;Description:Location:Filetype...
+               // We will grab only first pdf
+               if (!file.empty()) {
+                       docstring ret, filedest, tmp;
+                       ret = split(file, tmp, ':');
+                       tmp = split(ret, filedest, ':');
+                       //TODO howto deal with relative directories?
+                       FileName f(to_utf8(filedest));
+                       if (f.exists())
+                               file = "file:///" + filedest;
+               }
+
+               // kbibtex case, format:
+               // file1.pdf;file2.pdf
+               // We will grab only first pdf
+               docstring kfile;
+               if (file.empty())
+                       kfile = operator[]("localfile");
+               if (!kfile.empty()) {
+                       docstring filedest, tmp;
+                       tmp = split(kfile, filedest, ';');
+                       //TODO howto deal with relative directories?
+                       FileName f(to_utf8(filedest));
+                       if (f.exists())
+                               file = "file:///" + filedest;
+               }
+
+               if (!url.empty())
+                       return;
+
+               // try biblatex specific fields, see its manual
+               // 3.13.7 "Electronic Publishing Informationl"
+               docstring eprinttype = operator[]("eprinttype");
+               docstring eprint = operator[]("eprint");
+               if (eprint.empty())
+                       return;
+
+               if (eprinttype == "arxiv")
+                       url = "https://arxiv.org/abs/" + eprint;
+               if (eprinttype == "jstor")
+                       url = "https://www.jstor.org/stable/" + eprint;
+               if (eprinttype == "pubmed")
+                       url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
+               if (eprinttype == "hdl")
+                       url = "https://hdl.handle.net/" + eprint;
+               if (eprinttype == "googlebooks")
+                       url = "http://books.google.com/books?id=" + eprint;
+
+               return;
+       }
+
+       // Here can be handled the bibliography environment. All one could do
+       // here is let LyX scan the entry for URL or HRef insets.
+}
+
+
 namespace {
 
 docstring parseOptions(docstring const & format, string & optkey,
@@ -465,11 +856,11 @@ docstring parseOptions(docstring const & format, string & optkey,
 }
 
 
-} // anon namespace
+} // namespace
 
 /* FIXME
 Bug #9131 revealed an oddity in how we are generating citation information
-when more than one key is given. We end up building a longer and longer format 
+when more than one key is given. We end up building a longer and longer format
 string as we go, which we then have to re-parse, over and over and over again,
 rather than generating the information for the individual keys and then putting
 all of that together. We do that to deal with the way separators work, from what
@@ -477,16 +868,17 @@ I can tell, but it still feels like a hack. Fixing this would require quite a
 bit of work, however.
 */
 docstring BibTeXInfo::expandFormat(docstring const & format,
-               BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
-               docstring before, docstring after, docstring dialog, bool next) const
+               BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
+               CiteItem const & ci, bool next, bool second) const
 {
        // incorrect use of macros could put us in an infinite loop
        static int const max_passes = 5000;
        // the use of overly large keys can lead to performance problems, due
        // to eventual attempts to convert LaTeX macros to unicode. See bug
-       // #8944. This is perhaps not the best solution, but it will have to
-       // do for now.
-       static size_t const max_keysize = 128;
+       // #8944. By default, the size is limited to 128 (in CiteItem), but
+       // for specific purposes (such as XHTML export), it needs to be enlarged
+       // This is perhaps not the best solution, but it will have to do for now.
+       size_t const max_keysize = ci.max_key_size;
        odocstringstream ret; // return value
        string key;
        bool scanning_key = false;
@@ -517,16 +909,23 @@ docstring BibTeXInfo::expandFormat(docstring const & format,
                                        fmt = from_utf8(val) + fmt.substr(1);
                                        counter += 1;
                                        continue;
-                               } else if (key[0] == '_') {
-                                       // a translatable bit
+                               } else if (prefixIs(key, "B_")) {
+                                       // a translatable bit (to the Buffer language)
                                        string const val =
                                                buf.params().documentClass().getCiteMacro(engine_type, key);
                                        docstring const trans =
                                                translateIfPossible(from_utf8(val), buf.params().language->code());
                                        ret << trans;
+                               } else if (key[0] == '_') {
+                                       // a translatable bit (to the GUI language)
+                                       string const val =
+                                               buf.params().documentClass().getCiteMacro(engine_type, key);
+                                       docstring const trans =
+                                               translateIfPossible(from_utf8(val));
+                                       ret << trans;
                                } else {
                                        docstring const val =
-                                               getValueForKey(key, buf, before, after, dialog, xrefs, max_keysize);
+                                               getValueForKey(key, buf, ci, xrefs, max_keysize);
                                        if (!scanning_rich)
                                                ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
                                        ret << val;
@@ -557,17 +956,21 @@ docstring BibTeXInfo::expandFormat(docstring const & format,
                                                return _("ERROR!");
                                        fmt = newfmt;
                                        docstring const val =
-                                               getValueForKey(optkey, buf, before, after, dialog, xrefs);
+                                               getValueForKey(optkey, buf, ci, xrefs);
                                        if (optkey == "next" && next)
                                                ret << ifpart; // without expansion
-                                       else if (!val.empty()) {
+                                       else if (optkey == "second" && second) {
+                                               int newcounter = 0;
+                                               ret << expandFormat(ifpart, xrefs, newcounter, buf,
+                                                       ci, next);
+                                       } else if (!val.empty()) {
                                                int newcounter = 0;
                                                ret << expandFormat(ifpart, xrefs, newcounter, buf,
-                                                       before, after, dialog, next);
+                                                       ci, next);
                                        } else if (!elsepart.empty()) {
                                                int newcounter = 0;
                                                ret << expandFormat(elsepart, xrefs, newcounter, buf,
-                                                       before, after, dialog, next);
+                                                       ci, next);
                                        }
                                        // fmt will have been shortened for us already
                                        continue;
@@ -615,11 +1018,28 @@ docstring BibTeXInfo::expandFormat(docstring const & format,
 }
 
 
-docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
-       Buffer const & buf, bool richtext) const
+docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
+       Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
 {
-       if (!richtext && !info_.empty())
+       bool const richtext = ci.richtext;
+
+       CiteEngineType const engine_type = buf.params().citeEngineType();
+       DocumentClass const & dc = buf.params().documentClass();
+       docstring const & format = format_in.empty()? 
+                               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
+                             : format_in;
+
+       if (format != format_) {
+               // clear caches since format changed
+               info_.clear();
+               info_richtext_.clear();
+               format_ = format;
+       }
+
+       if (!richtext && !info_.empty()) {
+               info_ = convertLaTeXCommands(processRichtext(info_, false));
                return info_;
+       }
        if (richtext && !info_richtext_.empty())
                return info_richtext_;
 
@@ -629,13 +1049,9 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
                return info_;
        }
 
-       CiteEngineType const engine_type = buf.params().citeEngineType();
-       DocumentClass const & dc = buf.params().documentClass();
-       docstring const & format =
-               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
        int counter = 0;
        info_ = expandFormat(format, xrefs, counter, buf,
-               docstring(), docstring(), docstring(), false);
+               ci, false, false);
 
        if (info_.empty()) {
                // this probably shouldn't happen
@@ -652,19 +1068,17 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
 }
 
 
-docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
-       Buffer const & buf, docstring const & format, bool richtext,
-       docstring const & before, docstring const & after, 
-       docstring const & dialog, bool next) const
+docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
+       Buffer const & buf, docstring const & format,
+       CiteItem const & ci, bool next, bool second) const
 {
        docstring loclabel;
 
        int counter = 0;
-       loclabel = expandFormat(format, xrefs, counter, buf,
-               before, after, dialog, next);
+       loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
 
        if (!loclabel.empty() && !next) {
-               loclabel = processRichtext(loclabel, richtext);
+               loclabel = processRichtext(loclabel, ci.richtext);
                loclabel = convertLaTeXCommands(loclabel);
        }
 
@@ -689,8 +1103,7 @@ docstring const & BibTeXInfo::operator[](string const & field) const
 
 
 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
-       docstring const & before, docstring const & after, docstring const & dialog,
-       BibTeXInfoList const xrefs, size_t maxsize) const
+       CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
 {
        // anything less is pointless
        LASSERT(maxsize >= 16, maxsize = 16);
@@ -703,11 +1116,10 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 
        docstring ret = operator[](key);
        if (ret.empty() && !xrefs.empty()) {
-               vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
-               vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
-               for (; it != en; ++it) {
-                       if (*it && !(**it)[key].empty()) {
-                               ret = (**it)[key];
+               // xr is a (reference to a) BibTeXInfo const *
+               for (auto const & xr : xrefs) {
+                       if (xr && !(*xr)[key].empty()) {
+                               ret = (*xr)[key];
                                break;
                        }
                }
@@ -715,10 +1127,19 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
        if (ret.empty()) {
                // some special keys
                // FIXME: dialog, textbefore and textafter have nothing to do with this
-               if (key == "dialog")
-                       ret = dialog;
+               if (key == "dialog" && ci.context == CiteItem::Dialog)
+                       ret = from_ascii("x"); // any non-empty string will do
+               else if (key == "export" && ci.context == CiteItem::Export)
+                       ret = from_ascii("x"); // any non-empty string will do
+               else if (key == "ifstar" && ci.Starred)
+                       ret = from_ascii("x"); // any non-empty string will do
+               else if (key == "ifqualified" && ci.isQualified)
+                       ret = from_ascii("x"); // any non-empty string will do
                else if (key == "entrytype")
                        ret = entry_type_;
+               else if (prefixIs(key, "ifentrytype:")
+                        && from_ascii(key.substr(12)) == entry_type_)
+                       ret = from_ascii("x"); // any non-empty string will do
                else if (key == "key")
                        ret = bib_key_;
                else if (key == "label")
@@ -727,42 +1148,121 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        ret = modifier_;
                else if (key == "numericallabel")
                        ret = cite_number_;
-               else if (key == "abbrvauthor")
-                       // Special key to provide abbreviated author names.
-                       ret = getAbbreviatedAuthor(&buf, false);
-               else if (key == "shortauthor")
-                       // When shortauthor is not defined, jurabib automatically
-                       // provides jurabib-style abbreviated author names. We do
-                       // this as well.
-                       ret = getAbbreviatedAuthor(&buf, true);
-               else if (key == "shorttitle") {
-                       // When shorttitle is not defined, jurabib uses for `article'
-                       // and `periodical' entries the form `journal volume [year]'
-                       // and for other types of entries it uses the `title' field.
-                       if (entry_type_ == "article" || entry_type_ == "periodical")
-                               ret = operator[]("journal") + " " + operator[]("volume")
-                                       + " [" + operator[]("year") + "]";
-                       else
-                               ret = operator[]("title");
+               else if (prefixIs(key, "ifmultiple:")) {
+                       // Return whether we have multiple authors
+                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       if (multipleAuthors(kind))
+                               ret = from_ascii("x"); // any non-empty string will do
+               }
+               else if (prefixIs(key, "abbrvnames:")) {
+                       // Special key to provide abbreviated name list,
+                       // with respect to maxcitenames. Suitable for Bibliography
+                       // beginnings.
+                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       ret = getAuthorList(&buf, kind, false, false, true);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (prefixIs(key, "fullnames:")) {
+                       // Return a full name list. Suitable for Bibliography
+                       // beginnings.
+                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       ret = getAuthorList(&buf, kind, true, false, true);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (prefixIs(key, "forceabbrvnames:")) {
+                       // Special key to provide abbreviated name lists,
+                       // irrespective of maxcitenames. Suitable for Bibliography
+                       // beginnings.
+                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       ret = getAuthorList(&buf, kind, false, true, true);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (prefixIs(key, "abbrvbynames:")) {
+                       // Special key to provide abbreviated name list,
+                       // with respect to maxcitenames. Suitable for further names inside a
+                       // bibliography item // (such as "ed. by ...")
+                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       ret = getAuthorList(&buf, kind, false, false, true, false);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (prefixIs(key, "fullbynames:")) {
+                       // Return a full name list. Suitable for further names inside a
+                       // bibliography item // (such as "ed. by ...")
+                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       ret = getAuthorList(&buf, kind, true, false, true, false);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (prefixIs(key, "forceabbrvbynames:")) {
+                       // Special key to provide abbreviated name lists,
+                       // irrespective of maxcitenames. Suitable for further names inside a
+                       // bibliography item // (such as "ed. by ...")
+                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       ret = getAuthorList(&buf, kind, false, true, true, false);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (key == "abbrvciteauthor") {
+                       // Special key to provide abbreviated author or
+                       // editor names (suitable for citation labels),
+                       // with respect to maxcitenames.
+                       ret = getAuthorOrEditorList(&buf, false, false);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (key == "fullciteauthor") {
+                       // Return a full author or editor list (for citation labels)
+                       ret = getAuthorOrEditorList(&buf, true, false);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
+               } else if (key == "forceabbrvciteauthor") {
+                       // Special key to provide abbreviated author or
+                       // editor names (suitable for citation labels),
+                       // irrespective of maxcitenames.
+                       ret = getAuthorOrEditorList(&buf, false, true);
+                       if (ci.forceUpperCase && isLowerCase(ret[0]))
+                               ret[0] = uppercase(ret[0]);
                } else if (key == "bibentry") {
                        // Special key to provide the full bibliography entry: see getInfo()
                        CiteEngineType const engine_type = buf.params().citeEngineType();
                        DocumentClass const & dc = buf.params().documentClass();
                        docstring const & format =
-                               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
+                               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
                        int counter = 0;
-                       ret = expandFormat(format, xrefs, counter, buf,
-                               docstring(), docstring(), docstring(), false);
+                       ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
                } else if (key == "textbefore")
-                       ret = before;
+                       ret = ci.textBefore;
                else if (key == "textafter")
-                       ret = after;
-               else if (key == "year")
+                       ret = ci.textAfter;
+               else if (key == "curpretext") {
+                       vector<pair<docstring, docstring>> pres = ci.getPretexts();
+                       vector<pair<docstring, docstring>>::iterator it = pres.begin();
+                       int numkey = 1;
+                       for (; it != pres.end() ; ++it) {
+                               if ((*it).first == bib_key_ && numkey == num_bib_key_) {
+                                       ret = (*it).second;
+                                       pres.erase(it);
+                                       break;
+                               }
+                               if ((*it).first == bib_key_)
+                                       ++numkey;
+                       }
+               } else if (key == "curposttext") {
+                       vector<pair<docstring, docstring>> posts = ci.getPosttexts();
+                       vector<pair<docstring, docstring>>::iterator it = posts.begin();
+                       int numkey = 1;
+                       for (; it != posts.end() ; ++it) {
+                               if ((*it).first == bib_key_ && numkey == num_bib_key_) {
+                                       ret = (*it).second;
+                                       posts.erase(it);
+                                       break;
+                               }
+                               if ((*it).first == bib_key_)
+                                       ++numkey;
+                       }
+               } else if (key == "year")
                        ret = getYear();
        }
 
        if (cleanit)
-               ret = html::cleanAttr(ret);
+               ret = xml::cleanAttr(ret);
 
        // make sure it is not too big
        support::truncateWithEllipsis(ret, maxsize);
@@ -779,15 +1279,11 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 namespace {
 
 // A functor for use with sort, leading to case insensitive sorting
-class compareNoCase: public binary_function<docstring, docstring, bool>
-{
-public:
-       bool operator()(docstring const & s1, docstring const & s2) const {
-               return compare_no_case(s1, s2) < 0;
-       }
-};
+bool compareNoCase(const docstring & a, const docstring & b) {
+       return compare_no_case(a, b) < 0;
+}
 
-} // namespace anon
+} // namespace
 
 
 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
@@ -812,10 +1308,7 @@ vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const
        // XData field can consist of a comma-separated list of keys
        vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
        if (!xdatakeys.empty()) {
-               vector<docstring>::const_iterator xit = xdatakeys.begin();
-               vector<docstring>::const_iterator xen = xdatakeys.end();
-               for (; xit != xen; ++xit) {
-                       docstring const xdatakey = *xit;
+               for (auto const & xdatakey : xdatakeys) {
                        result.push_back(xdatakey);
                        BiblioInfo::const_iterator it = find(xdatakey);
                        if (it != end()) {
@@ -833,10 +1326,9 @@ vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const
 vector<docstring> const BiblioInfo::getKeys() const
 {
        vector<docstring> bibkeys;
-       BiblioInfo::const_iterator it  = begin();
-       for (; it != end(); ++it)
-               bibkeys.push_back(it->first);
-       sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
+       for (auto const & bi : *this)
+               bibkeys.push_back(bi.first);
+       sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
        return bibkeys;
 }
 
@@ -844,10 +1336,8 @@ vector<docstring> const BiblioInfo::getKeys() const
 vector<docstring> const BiblioInfo::getFields() const
 {
        vector<docstring> bibfields;
-       set<docstring>::const_iterator it = field_names_.begin();
-       set<docstring>::const_iterator end = field_names_.end();
-       for (; it != end; ++it)
-               bibfields.push_back(*it);
+       for (auto const & fn : field_names_)
+               bibfields.push_back(fn);
        sort(bibfields.begin(), bibfields.end());
        return bibfields;
 }
@@ -856,22 +1346,20 @@ vector<docstring> const BiblioInfo::getFields() const
 vector<docstring> const BiblioInfo::getEntries() const
 {
        vector<docstring> bibentries;
-       set<docstring>::const_iterator it = entry_types_.begin();
-       set<docstring>::const_iterator end = entry_types_.end();
-       for (; it != end; ++it)
-               bibentries.push_back(*it);
+       for (auto const & et : entry_types_)
+               bibentries.push_back(et);
        sort(bibentries.begin(), bibentries.end());
        return bibentries;
 }
 
 
-docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key, Buffer const & buf) const
+docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
 {
        BiblioInfo::const_iterator it = find(key);
        if (it == end())
                return docstring();
        BibTeXInfo const & data = it->second;
-       return data.getAbbreviatedAuthor(&buf, false);
+       return data.getAuthorOrEditorList(&buf, false);
 }
 
 
@@ -884,6 +1372,15 @@ docstring const BiblioInfo::getCiteNumber(docstring const & key) const
        return data.citeNumber();
 }
 
+void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
+{
+       BiblioInfo::const_iterator it = find(key);
+        if (it == end())
+               return;
+       BibTeXInfo const & data = it->second;
+       data.getLocators(doi,url,file);
+}
+
 
 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 {
@@ -898,10 +1395,8 @@ docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) co
                if (xrefs.empty())
                        // no luck
                        return docstring();
-               vector<docstring>::const_iterator it = xrefs.begin();
-               vector<docstring>::const_iterator en = xrefs.end();
-               for (; it != en; ++it) {
-                       BiblioInfo::const_iterator const xrefit = find(*it);
+               for (docstring const & xref : xrefs) {
+                       BiblioInfo::const_iterator const xrefit = find(xref);
                        if (xrefit == end())
                                continue;
                        BibTeXInfo const & xref_data = xrefit->second;
@@ -927,47 +1422,54 @@ docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, b
 
 
 docstring const BiblioInfo::getInfo(docstring const & key,
-       Buffer const & buf, bool richtext) const
+       Buffer const & buf, CiteItem const & ci, docstring const & format) const
 {
        BiblioInfo::const_iterator it = find(key);
        if (it == end())
-               return docstring(_("Bibliography entry not found!"));
+               return _("Bibliography entry not found!");
        BibTeXInfo const & data = it->second;
        BibTeXInfoList xrefptrs;
-       vector<docstring> const xrefs = getXRefs(data);
-       if (!xrefs.empty()) {
-               vector<docstring>::const_iterator it = xrefs.begin();
-               vector<docstring>::const_iterator en = xrefs.end();
-               for (; it != en; ++it) {
-                       BiblioInfo::const_iterator const xrefit = find(*it);
-                       if (xrefit != end())
-                               xrefptrs.push_back(&(xrefit->second));
-               }
+       for (docstring const & xref : getXRefs(data)) {
+               BiblioInfo::const_iterator const xrefit = find(xref);
+               if (xrefit != end())
+                       xrefptrs.push_back(&(xrefit->second));
        }
-       return data.getInfo(xrefptrs, buf, richtext);
+       return data.getInfo(xrefptrs, buf, ci, format);
 }
 
 
 docstring const BiblioInfo::getLabel(vector<docstring> keys,
-       Buffer const & buf, string const & style, bool for_xhtml,
-       size_t max_size, docstring const & before, docstring const & after,
-       docstring const & dialog) const
+       Buffer const & buf, string const & style, CiteItem const & ci) const
 {
+       size_t max_size = ci.max_size;
        // shorter makes no sense
        LASSERT(max_size >= 16, max_size = 16);
 
        // we can't display more than 10 of these, anyway
+       // but since we truncate in the middle,
+       // we need to split into two halfs.
        bool const too_many_keys = keys.size() > 10;
-       if (too_many_keys)
-               keys.resize(10);
+       vector<docstring> lkeys;
+       if (too_many_keys) {
+               lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
+               keys.resize(5);
+               keys.insert(keys.end(), lkeys.begin(), lkeys.end());
+       }
 
        CiteEngineType const engine_type = buf.params().citeEngineType();
        DocumentClass const & dc = buf.params().documentClass();
-       docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, "cite"));
+       docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
        docstring ret = format;
        vector<docstring>::const_iterator key = keys.begin();
        vector<docstring>::const_iterator ken = keys.end();
-       for (; key != ken; ++key) {
+       vector<docstring> handled_keys;
+       for (int i = 0; key != ken; ++key, ++i) {
+               handled_keys.push_back(*key);
+               int n = 0;
+               for (auto const & k : handled_keys) {
+                       if (k == *key)
+                               ++n;
+               }
                BiblioInfo::const_iterator it = find(*key);
                BibTeXInfo empty_data;
                empty_data.key(*key);
@@ -975,24 +1477,18 @@ docstring const BiblioInfo::getLabel(vector<docstring> keys,
                vector<BibTeXInfo const *> xrefptrs;
                if (it != end()) {
                        data = it->second;
-                       vector<docstring> const xrefs = getXRefs(data);
-                       if (!xrefs.empty()) {
-                               vector<docstring>::const_iterator it = xrefs.begin();
-                               vector<docstring>::const_iterator en = xrefs.end();
-                               for (; it != en; ++it) {
-                                       BiblioInfo::const_iterator const xrefit = find(*it);
-                                       if (xrefit != end())
-                                               xrefptrs.push_back(&(xrefit->second));
-                               }
+                       for (docstring const & xref : getXRefs(data)) {
+                               BiblioInfo::const_iterator const xrefit = find(xref);
+                               if (xrefit != end())
+                                       xrefptrs.push_back(&(xrefit->second));
                        }
                }
-               ret = data.getLabel(xrefptrs, buf, ret, for_xhtml,
-                       before, after, dialog, key + 1 != ken);
+               data.numKey(n);
+               ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
        }
 
-       if (too_many_keys)
-               ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
-       support::truncateWithEllipsis(ret, max_size);
+       support::truncateWithEllipsis(ret, max_size, true);
+
        return ret;
 }
 
@@ -1008,22 +1504,21 @@ bool BiblioInfo::isBibtex(docstring const & key) const
 }
 
 
-vector<docstring> const BiblioInfo::getCiteStrings(
+BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
        vector<docstring> const & keys, vector<CitationStyle> const & styles,
-       Buffer const & buf, docstring const & before,
-       docstring const & after, docstring const & dialog, size_t max_size) const
+       Buffer const & buf, CiteItem const & ci) const
 {
        if (empty())
-               return vector<docstring>();
+               return vector<pair<docstring,docstring>>();
 
        string style;
-       vector<docstring> vec(styles.size());
-       for (size_t i = 0; i != vec.size(); ++i) {
+       CiteStringMap csm(styles.size());
+       for (size_t i = 0; i != csm.size(); ++i) {
                style = styles[i].name;
-               vec[i] = getLabel(keys, buf, style, false, max_size, before, after, dialog);
+               csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
        }
 
-       return vec;
+       return csm;
 }
 
 
@@ -1040,8 +1535,8 @@ namespace {
 // used in xhtml to sort a list of BibTeXInfo objects
 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 {
-       docstring const lauth = lhs->getAbbreviatedAuthor();
-       docstring const rauth = rhs->getAbbreviatedAuthor();
+       docstring const lauth = lhs->getAuthorOrEditorList();
+       docstring const rauth = rhs->getAuthorOrEditorList();
        docstring const lyear = lhs->getYear();
        docstring const ryear = rhs->getYear();
        docstring const ltitl = lhs->operator[]("title");
@@ -1051,7 +1546,7 @@ bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
                || (lauth == rauth && lyear == ryear && ltitl < rtitl);
 }
 
-}
+} // namespace
 
 
 void BiblioInfo::collectCitedEntries(Buffer const & buf)
@@ -1062,13 +1557,11 @@ void BiblioInfo::collectCitedEntries(Buffer const & buf)
        // FIXME We may want to collect these differently, in the first case,
        // so that we might have them in order of appearance.
        set<docstring> citekeys;
-       shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
-       Toc::const_iterator it = toc->begin();
-       Toc::const_iterator const en = toc->end();
-       for (; it != en; ++it) {
-               if (it->str().empty())
+       Toc const & toc = *buf.tocBackend().toc("citation");
+       for (auto const & t : toc) {
+               if (t.str().empty())
                        continue;
-               vector<docstring> const keys = getVectorFromString(it->str());
+               vector<docstring> const keys = getVectorFromString(t.str());
                citekeys.insert(keys.begin(), keys.end());
        }
        if (citekeys.empty())
@@ -1078,10 +1571,8 @@ void BiblioInfo::collectCitedEntries(Buffer const & buf)
        // We will now convert it to a list of the BibTeXInfo objects used in
        // this document...
        vector<BibTeXInfo const *> bi;
-       set<docstring>::const_iterator cit = citekeys.begin();
-       set<docstring>::const_iterator const cen = citekeys.end();
-       for (; cit != cen; ++cit) {
-               BiblioInfo::const_iterator const bt = find(*cit);
+       for (auto const & ck : citekeys) {
+               BiblioInfo::const_iterator const bt = find(ck);
                if (bt == end() || !bt->second.isBibTeX())
                        continue;
                bi.push_back(&(bt->second));
@@ -1090,10 +1581,9 @@ void BiblioInfo::collectCitedEntries(Buffer const & buf)
        sort(bi.begin(), bi.end(), lSorter);
 
        // Now we can write the sorted keys
-       vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
-       vector<BibTeXInfo const *>::const_iterator ben = bi.end();
-       for (; bit != ben; ++bit)
-               cited_entries_.push_back((*bit)->key());
+       // b is a BibTeXInfo const *
+       for (auto const & b : bi)
+               cited_entries_.push_back(b->key());
 }
 
 
@@ -1108,12 +1598,11 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf)
        // used to remember the last one we saw
        // we'll be comparing entries to see if we need to add
        // modifiers, like "1984a"
-       map<docstring, BibTeXInfo>::iterator last;
+       map<docstring, BibTeXInfo>::iterator last = bimap_.end();
 
-       vector<docstring>::const_iterator it = cited_entries_.begin();
-       vector<docstring>::const_iterator const en = cited_entries_.end();
-       for (; it != en; ++it) {
-               map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
+       // add letters to years
+       for (auto const & ce : cited_entries_) {
+               map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
                // this shouldn't happen, but...
                if (biit == bimap_.end())
                        // ...fail gracefully, anyway.
@@ -1123,13 +1612,11 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf)
                        docstring const num = convert<docstring>(++keynumber);
                        entry.setCiteNumber(num);
                } else {
-                       // coverity complains about our derefercing the iterator last,
-                       // which was not initialized above. but it does get initialized
-                       // after the first time through the loop, which is the point of
-                       // the first test.
-                       // coverity[FORWARD_NULL]
-                       if (it != cited_entries_.begin()
-                           && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
+                       // The first test here is checking whether this is the first
+                       // time through the loop. If so, then we do not have anything
+                       // with which to compare.
+                       if (last != bimap_.end()
+                           && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
                            // we access the year via getYear() so as to get it from the xref,
                            // if we need to do so
                            && getYear(entry.key()) == getYear(last->second.key())) {
@@ -1150,9 +1637,8 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf)
                }
        }
        // Set the labels
-       it = cited_entries_.begin();
-       for (; it != en; ++it) {
-               map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
+       for (auto const & ce : cited_entries_) {
+               map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
                // this shouldn't happen, but...
                if (biit == bimap_.end())
                        // ...fail gracefully, anyway.
@@ -1161,10 +1647,10 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf)
                if (numbers) {
                        entry.label(entry.citeNumber());
                } else {
-                       docstring const auth = entry.getAbbreviatedAuthor(&buf, false);
+                       docstring const auth = entry.getAuthorOrEditorList(&buf, false);
                        // we do it this way so as to access the xref, if necessary
                        // note that this also gives us the modifier
-                       docstring const year = getYear(*it, buf, true);
+                       docstring const year = getYear(ce, buf, true);
                        if (!auth.empty() && !year.empty())
                                entry.label(auth + ' ' + year);
                        else
@@ -1217,4 +1703,79 @@ string citationStyleToString(const CitationStyle & cs, bool const latex)
        return cmd;
 }
 
+
+docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
+{
+       // This function closely mimics getAuthorList, but produces DocBook instead of text.
+       // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
+       // as the output has a database-like shape.
+       // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
+
+       if (authorsString.empty()) {
+               return docstring();
+       }
+
+       // Split the input list of authors into individual authors.
+       vector<docstring> const authors = getAuthors(authorsString);
+
+       // Retrieve the "et al." variation.
+       string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
+
+       // Output the list of authors.
+       xs << xml::StartTag("authorgroup");
+       xs << xml::CR();
+
+       auto it = authors.cbegin();
+       auto en = authors.cend();
+       for (size_t i = 0; it != en; ++it, ++i) {
+               xs << xml::StartTag("author");
+               xs << xml::CR();
+               xs << xml::StartTag("personname");
+               xs << xml::CR();
+               docstring name = *it;
+
+               // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
+               if (name == "others") {
+                       xs << buf.B_(etal);
+               } else {
+                       name_parts parts = nameParts(name);
+                       if (! parts.prefix.empty()) {
+                               xs << xml::StartTag("honorific");
+                               xs << parts.prefix;
+                               xs << xml::EndTag("honorific");
+                               xs << xml::CR();
+                       }
+                       if (! parts.prename.empty()) {
+                               xs << xml::StartTag("firstname");
+                               xs << parts.prename;
+                               xs << xml::EndTag("firstname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.surname.empty()) {
+                               xs << xml::StartTag("surname");
+                               xs << parts.surname;
+                               xs << xml::EndTag("surname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.suffix.empty()) {
+                               xs << xml::StartTag("othername", "role=\"suffix\"");
+                               xs << parts.suffix;
+                               xs << xml::EndTag("othername");
+                               xs << xml::CR();
+                       }
+               }
+
+               xs << xml::EndTag("personname");
+               xs << xml::CR();
+               xs << xml::EndTag("author");
+               xs << xml::CR();
+
+               // Could add an affiliation after <personname>, but not stored in BibTeX.
+       }
+       xs << xml::EndTag("authorgroup");
+       xs << xml::CR();
+
+       return docstring();
+}
+
 } // namespace lyx