X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FBiblioInfo.cpp;h=a9f48e44577ed28eea1a66389b0699ad0b7c63d2;hb=28be7d552f62cc02fa86d7f79201d089bfb2d7b5;hp=ab06f37bf85e879e146b8fa71cbeac51d3e1cbce;hpb=a751c5b846a836d9f1096052e281adae533c647c;p=lyx.git diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp index ab06f37bf8..a9f48e4457 100644 --- a/src/BiblioInfo.cpp +++ b/src/BiblioInfo.cpp @@ -36,6 +36,7 @@ #include "support/regex.h" #include "support/textutils.h" +#include #include using namespace std; @@ -46,40 +47,155 @@ namespace lyx { namespace { -// gets the "prename" and "family name" from an author-type string -pair nameParts(docstring const & name) +// Remove placeholders from names +docstring renormalize(docstring const & input) { - if (name.empty()) - return make_pair(docstring(), docstring()); + docstring res = subst(input, from_ascii("$$space!"), from_ascii(" ")); + return subst(res, from_ascii("$$comma!"), from_ascii(",")); +} + + +// Split the surname into prefix ("von-part") and family name +pair parseSurname(docstring const & sname) +{ + // Split the surname into its tokens + vector pieces = getVectorFromString(sname, from_ascii(" ")); + if (pieces.size() < 2) + return make_pair(docstring(), sname); + + // Now we look for pieces that begin with a lower case letter. + // All except for the very last token constitute the "von-part". + docstring prefix; + vector::const_iterator it = pieces.begin(); + vector::const_iterator const en = pieces.end(); + bool first = true; + for (; it != en; ++it) { + if ((*it).empty()) + continue; + // If this is the last piece, then what we now have is + // the family name, notwithstanding the casing. + if (it + 1 == en) + break; + char_type const c = (*it)[0]; + // If the piece starts with a upper case char, we assume + // this is part of the surname. + if (!isLower(c)) + break; + // Nothing of the former, so add this piece to the prename + if (!first) + prefix += " "; + else + first = false; + prefix += *it; + } + + // Reconstruct the family name. + // Note that if we left the loop with because it + 1 == en, + // then this will still do the right thing, i.e., make surname + // just be the last piece. + docstring surname; + first = true; + for (; it != en; ++it) { + if (!first) + surname += " "; + else + first = false; + surname += *it; + } + return make_pair(prefix, surname); +} + + +struct name_parts { + docstring surname; + docstring prename; + docstring suffix; + docstring prefix; +}; - // first we look for a comma, and take the last name to be everything - // preceding the right-most one, so that we also get the "jr" part. + +// gets the name parts (prename, surname, prefix, suffix) from an author-type string +name_parts nameParts(docstring const & iname) +{ + name_parts res; + if (iname.empty()) + return res; + + // First we check for goupings (via {...}) and replace blanks and + // commas inside groups with temporary placeholders + docstring name; + int gl = 0; + docstring::const_iterator p = iname.begin(); + while (p != iname.end()) { + // count grouping level + if (*p == '{') + ++gl; + else if (*p == '}') + --gl; + // generate string with probable placeholders + if (*p == ' ' && gl > 0) + name += from_ascii("$$space!"); + else if (*p == ',' && gl > 0) + name += from_ascii("$$comma!"); + else + name += *p; + ++p; + } + + // Now we look for a comma, and take the last name to be everything + // preceding the right-most one, so that we also get the name suffix + // (aka "jr" part). vector pieces = getVectorFromString(name); - if (pieces.size() > 1) - // whether we have a jr. part or not, it's always - // the first and last item (reversed) - return make_pair(pieces.back(), pieces.front()); + if (pieces.size() > 1) { + // Whether we have a name suffix or not, the prename is + // always last item + res.prename = renormalize(pieces.back()); + // The family name, conversely, is always the first item. + // However, it might contain a prefix (aka "von" part) + docstring const sname = pieces.front(); + res.prefix = renormalize(parseSurname(sname).first); + res.surname = renormalize(parseSurname(sname).second); + // If we have three pieces (the maximum allowed by BibTeX), + // the second one is the name suffix. + if (pieces.size() > 2) + res.suffix = renormalize(pieces.at(1)); + return res; + } - // OK, so now we want to look for the last name. We're going to - // include the "von" part. This isn't perfect. + // OK, so now we want to look for the last name. // Split on spaces, to get various tokens. pieces = getVectorFromString(name, from_ascii(" ")); - // If we only get two, assume the last one is the last name - if (pieces.size() <= 2) - return make_pair(pieces.front(), pieces.back()); + // No space: Only a family name given + if (pieces.size() < 2) { + res.surname = renormalize(pieces.back()); + return res; + } + // If we get two pieces, assume "prename surname" + if (pieces.size() == 2) { + res.prename = renormalize(pieces.front()); + res.surname = renormalize(pieces.back()); + return res; + } - // Now we look for the first token that begins with - // a lower case letter or an opening group {. + // More than 3 pieces: A name prefix (aka "von" part) might be included. + // We look for the first piece that begins with a lower case letter + // (which is the name prefix, if it is not the last token) or the last token. docstring prename; vector::const_iterator it = pieces.begin(); - vector::const_iterator en = pieces.end(); + vector::const_iterator const en = pieces.end(); bool first = true; for (; it != en; ++it) { if ((*it).empty()) continue; char_type const c = (*it)[0]; - if (isLower(c) || c == '{') + // If the piece starts with a lower case char, we assume + // this is the name prefix and thus prename is complete. + if (isLower(c)) break; + // Same if this is the last piece, which is always the surname. + if (it + 1 == en) + break; + // Nothing of the former, so add this piece to the prename if (!first) prename += " "; else @@ -87,10 +203,10 @@ pair nameParts(docstring const & name) prename += *it; } - if (it == en) // we never found a "von" or group - return make_pair(prename, pieces.back()); - - // reconstruct the family name + // Now reconstruct the family name and strip the prefix. + // Note that if we left the loop because it + 1 == en, + // then this will still do the right thing, i.e., make surname + // just be the last piece. docstring surname; first = true; for (; it != en; ++it) { @@ -100,7 +216,10 @@ pair nameParts(docstring const & name) first = false; surname += *it; } - return make_pair(prename, surname); + res.prename = renormalize(prename); + res.prefix = renormalize(parseSurname(surname).first); + res.surname = renormalize(parseSurname(surname).second); + return res; } @@ -108,20 +227,93 @@ docstring constructName(docstring const & name, string const scheme) { // re-constructs a name from name parts according // to a given scheme - docstring const prename = nameParts(name).first; - docstring const surname = nameParts(name).second; - docstring result = from_ascii(scheme); + docstring const prename = nameParts(name).prename; + docstring const surname = nameParts(name).surname; + docstring const prefix = nameParts(name).prefix; + docstring const suffix = nameParts(name).suffix; + string res = scheme; + static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + smatch sub; + if (regex_match(scheme, sub, reg1)) { + res = sub.str(1); + if (!prename.empty()) + res += sub.str(3); + res += sub.str(5); + } + if (regex_match(res, sub, reg2)) { + res = sub.str(1); + if (!suffix.empty()) + res += sub.str(3); + res += sub.str(5); + } + if (regex_match(res, sub, reg3)) { + res = sub.str(1); + if (!prefix.empty()) + res += sub.str(3); + res += sub.str(5); + } + docstring result = from_ascii(res); result = subst(result, from_ascii("%prename%"), prename); result = subst(result, from_ascii("%surname%"), surname); + result = subst(result, from_ascii("%prefix%"), prefix); + result = subst(result, from_ascii("%suffix%"), suffix); return result; } +vector const getAuthors(docstring const & author) +{ + // We check for goupings (via {...}) and only consider " and " + // outside groups as author separator. This is to account + // for cases such as {{Barnes and Noble, Inc.}}, which + // need to be treated as one single family name. + // We use temporary placeholders in order to differentiate the + // diverse " and " cases. + + // First, we temporarily replace all ampersands. It is rather unusual + // in author names, but can happen (consider cases such as "C \& A Corp."). + docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!")); + // Then, we temporarily make all " and " strings to ampersands in order + // to handle them later on a per-char level. + iname = subst(iname, from_ascii(" and "), from_ascii(" & ")); + // Now we traverse through the string and replace the "&" by the proper + // output in- and outside groups + docstring name; + int gl = 0; + docstring::const_iterator p = iname.begin(); + while (p != iname.end()) { + // count grouping level + if (*p == '{') + ++gl; + else if (*p == '}') + --gl; + // generate string with probable placeholders + if (*p == '&') { + if (gl > 0) + // Inside groups, we output "and" + name += from_ascii("and"); + else + // Outside groups, we output a separator + name += from_ascii("$$namesep!"); + } + else + name += *p; + ++p; + } + + // re-insert the literal ampersands + name = subst(name, from_ascii("$$amp!"), from_ascii("&")); + + // Now construct the actual vector + return getVectorFromString(name, from_ascii(" $$namesep! ")); +} + + bool multipleAuthors(docstring const author) { - vector const authors = - getVectorFromString(author, from_ascii(" and ")); - return authors.size() > 1; + return getAuthors(author).size() > 1; } @@ -186,7 +378,18 @@ docstring convertLaTeXCommands(docstring const & str) continue; } - // we just ignore braces + // Change text mode accents in the form + // {\v a} to \v{a} (see #9340). + // FIXME: This is a sort of mini-tex2lyx. + // Use the real tex2lyx instead! + static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}"); + if (lyx::regex_search(to_utf8(val), tma_reg)) { + val = val.substr(1); + val.replace(2, 1, from_ascii("{")); + continue; + } + + // Apart from the above, we just ignore braces if (ch == '{' || ch == '}') { val = val.substr(1); continue; @@ -288,6 +491,7 @@ BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type) {} + docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf, bool full, bool forceshort) const { @@ -299,9 +503,9 @@ docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf, } -docstring const BibTeXInfo::getAuthorList(Buffer const * buf, docstring author, - bool full, bool forceshort, bool allnames, - bool beginning) const +docstring const BibTeXInfo::getAuthorList(Buffer const * buf, + docstring const & author, bool const full, bool const forceshort, + bool const allnames, bool const beginning) const { // Maxnames treshold depend on engine size_t maxnames = buf ? @@ -324,13 +528,9 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, docstring author, if (author.empty()) return author; - // FIXME Move this to a separate routine that can - // be called from elsewhere. - // // OK, we've got some names. Let's format them. - // Try to split the author list on " and " - vector const authors = - getVectorFromString(author, from_ascii(" and ")); + // Try to split the author list + vector const authors = getAuthors(author); docstring retval; @@ -352,18 +552,20 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, docstring author, : " and "; string firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform") - : "%surname%, %prename%"; + : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}"; if (!beginning) firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform") - : "%prename% %surname%"; + : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}"; string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform") - : "%surname%, %prename%"; + : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}"; if (!beginning) othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform") - : "%prename% %surname%"; + : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}"; + string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform") + : "{%prefix%[[%prefix% ]]}%surname%"; // Shorten the list (with et al.) if forceshort is set - // and the list can actually be shorten, else if maxcitenames + // and the list can actually be shortened, else if maxcitenames // is passed and full is not set. bool shorten = forceshort && authors.size() > 1; vector::const_iterator it = authors.begin(); @@ -388,13 +590,13 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, docstring author, retval += (i == 0) ? constructName(*it, firstnameform) : constructName(*it, othernameform); else - retval += nameParts(*it).second; + retval += constructName(*it, citenameform); } if (shorten) { if (allnames) retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal)); else - retval = nameParts(authors[0]).second + (buf ? buf->B_(etal) : from_ascii(etal)); + retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal)); } return convertLaTeXCommands(retval); @@ -416,7 +618,9 @@ docstring const BibTeXInfo::getYear() const static regex const ereg(".*/[-]?([\\d]{4}).*"); smatch sm; string const date = to_utf8(year); - regex_match(date, sm, yreg); + if (!regex_match(date, sm, yreg)) + // cannot parse year. + return docstring(); year = from_ascii(sm[1]); // check for an endyear if (regex_match(date, sm, ereg)) @@ -812,6 +1016,8 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, ret = from_ascii("x"); // any non-empty string will do else if (key == "ifstar" && ci.Starred) ret = from_ascii("x"); // any non-empty string will do + else if (key == "ifqualified" && ci.isQualified) + ret = from_ascii("x"); // any non-empty string will do else if (key == "entrytype") ret = entry_type_; else if (prefixIs(key, "ifentrytype:") @@ -908,6 +1114,10 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf, ret = ci.textBefore; else if (key == "textafter") ret = ci.textAfter; + else if (key == "curpretext") + ret = ci.getPretexts()[bib_key_]; + else if (key == "curposttext") + ret = ci.getPosttexts()[bib_key_]; else if (key == "year") ret = getYear(); } @@ -1157,21 +1367,21 @@ bool BiblioInfo::isBibtex(docstring const & key) const } -vector const BiblioInfo::getCiteStrings( +BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings( vector const & keys, vector const & styles, Buffer const & buf, CiteItem const & ci) const { if (empty()) - return vector(); + return vector>(); string style; - vector vec(styles.size()); - for (size_t i = 0; i != vec.size(); ++i) { + CiteStringMap csm(styles.size()); + for (size_t i = 0; i != csm.size(); ++i) { style = styles[i].name; - vec[i] = getLabel(keys, buf, style, ci); + csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci)); } - return vec; + return csm; } @@ -1256,7 +1466,7 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf) // used to remember the last one we saw // we'll be comparing entries to see if we need to add // modifiers, like "1984a" - map::iterator last; + map::iterator last = bimap_.end(); vector::const_iterator it = cited_entries_.begin(); vector::const_iterator const en = cited_entries_.end(); @@ -1271,12 +1481,10 @@ void BiblioInfo::makeCitationLabels(Buffer const & buf) docstring const num = convert(++keynumber); entry.setCiteNumber(num); } else { - // coverity complains about our derefercing the iterator last, - // which was not initialized above. but it does get initialized - // after the first time through the loop, which is the point of - // the first test. - // coverity[FORWARD_NULL] - if (it != cited_entries_.begin() + // The first test here is checking whether this is the first + // time through the loop. If so, then we do not have anything + // with which to compare. + if (last != bimap_.end() && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList() // we access the year via getYear() so as to get it from the xref, // if we need to do so