X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FBiblioInfo.cpp;h=a9f48e44577ed28eea1a66389b0699ad0b7c63d2;hb=28be7d552f62cc02fa86d7f79201d089bfb2d7b5;hp=67a35272cde21ea6fb9bc2a4fc80eb48e06c20be;hpb=ee26e7fadf0b26b767edee709369c46514680bd4;p=lyx.git diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp index 67a35272cd..a9f48e4457 100644 --- a/src/BiblioInfo.cpp +++ b/src/BiblioInfo.cpp @@ -55,11 +55,71 @@ docstring renormalize(docstring const & input) } -// gets the "prename" and "family name" from an author-type string -pair nameParts(docstring const & iname) +// Split the surname into prefix ("von-part") and family name +pair parseSurname(docstring const & sname) { + // Split the surname into its tokens + vector pieces = getVectorFromString(sname, from_ascii(" ")); + if (pieces.size() < 2) + return make_pair(docstring(), sname); + + // Now we look for pieces that begin with a lower case letter. + // All except for the very last token constitute the "von-part". + docstring prefix; + vector::const_iterator it = pieces.begin(); + vector::const_iterator const en = pieces.end(); + bool first = true; + for (; it != en; ++it) { + if ((*it).empty()) + continue; + // If this is the last piece, then what we now have is + // the family name, notwithstanding the casing. + if (it + 1 == en) + break; + char_type const c = (*it)[0]; + // If the piece starts with a upper case char, we assume + // this is part of the surname. + if (!isLower(c)) + break; + // Nothing of the former, so add this piece to the prename + if (!first) + prefix += " "; + else + first = false; + prefix += *it; + } + + // Reconstruct the family name. + // Note that if we left the loop with because it + 1 == en, + // then this will still do the right thing, i.e., make surname + // just be the last piece. + docstring surname; + first = true; + for (; it != en; ++it) { + if (!first) + surname += " "; + else + first = false; + surname += *it; + } + return make_pair(prefix, surname); +} + + +struct name_parts { + docstring surname; + docstring prename; + docstring suffix; + docstring prefix; +}; + + +// gets the name parts (prename, surname, prefix, suffix) from an author-type string +name_parts nameParts(docstring const & iname) +{ + name_parts res; if (iname.empty()) - return make_pair(docstring(), docstring()); + return res; // First we check for goupings (via {...}) and replace blanks and // commas inside groups with temporary placeholders @@ -83,26 +143,43 @@ pair nameParts(docstring const & iname) } // Now we look for a comma, and take the last name to be everything - // preceding the right-most one, so that we also get the "jr" part. + // preceding the right-most one, so that we also get the name suffix + // (aka "jr" part). vector pieces = getVectorFromString(name); - if (pieces.size() > 1) - // whether we have a jr. part or not, it's always - // the first and last item (reversed) - return make_pair(renormalize(pieces.back()), renormalize(pieces.front())); + if (pieces.size() > 1) { + // Whether we have a name suffix or not, the prename is + // always last item + res.prename = renormalize(pieces.back()); + // The family name, conversely, is always the first item. + // However, it might contain a prefix (aka "von" part) + docstring const sname = pieces.front(); + res.prefix = renormalize(parseSurname(sname).first); + res.surname = renormalize(parseSurname(sname).second); + // If we have three pieces (the maximum allowed by BibTeX), + // the second one is the name suffix. + if (pieces.size() > 2) + res.suffix = renormalize(pieces.at(1)); + return res; + } - // OK, so now we want to look for the last name. We're going to - // include the "von" part. This isn't perfect. + // OK, so now we want to look for the last name. // Split on spaces, to get various tokens. pieces = getVectorFromString(name, from_ascii(" ")); // No space: Only a family name given - if (pieces.size() < 2) - return make_pair(from_ascii(""), renormalize(pieces.back())); - // If we get two pieces, assume the last one is the last name - if (pieces.size() == 2) - return make_pair(renormalize(pieces.front()), renormalize(pieces.back())); + if (pieces.size() < 2) { + res.surname = renormalize(pieces.back()); + return res; + } + // If we get two pieces, assume "prename surname" + if (pieces.size() == 2) { + res.prename = renormalize(pieces.front()); + res.surname = renormalize(pieces.back()); + return res; + } - // More than 3 pieces: Now we look for the first piece that - // begins with a lower case letter (the "von-part"). + // More than 3 pieces: A name prefix (aka "von" part) might be included. + // We look for the first piece that begins with a lower case letter + // (which is the name prefix, if it is not the last token) or the last token. docstring prename; vector::const_iterator it = pieces.begin(); vector::const_iterator const en = pieces.end(); @@ -112,12 +189,10 @@ pair nameParts(docstring const & iname) continue; char_type const c = (*it)[0]; // If the piece starts with a lower case char, we assume - // this is the "von-part" (family name prefix) and thus part - // of the family name. + // this is the name prefix and thus prename is complete. if (isLower(c)) break; - // If this is the last piece, then what we now have is - // the family name. + // Same if this is the last piece, which is always the surname. if (it + 1 == en) break; // Nothing of the former, so add this piece to the prename @@ -128,8 +203,8 @@ pair nameParts(docstring const & iname) prename += *it; } - // Reconstruct the family name. - // Note that if we left the loop with because it + 1 == en, + // Now reconstruct the family name and strip the prefix. + // Note that if we left the loop because it + 1 == en, // then this will still do the right thing, i.e., make surname // just be the last piece. docstring surname; @@ -141,7 +216,10 @@ pair nameParts(docstring const & iname) first = false; surname += *it; } - return make_pair(renormalize(prename), renormalize(surname)); + res.prename = renormalize(prename); + res.prefix = renormalize(parseSurname(surname).first); + res.surname = renormalize(parseSurname(surname).second); + return res; } @@ -149,20 +227,93 @@ docstring constructName(docstring const & name, string const scheme) { // re-constructs a name from name parts according // to a given scheme - docstring const prename = nameParts(name).first; - docstring const surname = nameParts(name).second; - docstring result = from_ascii(scheme); + docstring const prename = nameParts(name).prename; + docstring const surname = nameParts(name).surname; + docstring const prefix = nameParts(name).prefix; + docstring const suffix = nameParts(name).suffix; + string res = scheme; + static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)"); + smatch sub; + if (regex_match(scheme, sub, reg1)) { + res = sub.str(1); + if (!prename.empty()) + res += sub.str(3); + res += sub.str(5); + } + if (regex_match(res, sub, reg2)) { + res = sub.str(1); + if (!suffix.empty()) + res += sub.str(3); + res += sub.str(5); + } + if (regex_match(res, sub, reg3)) { + res = sub.str(1); + if (!prefix.empty()) + res += sub.str(3); + res += sub.str(5); + } + docstring result = from_ascii(res); result = subst(result, from_ascii("%prename%"), prename); result = subst(result, from_ascii("%surname%"), surname); + result = subst(result, from_ascii("%prefix%"), prefix); + result = subst(result, from_ascii("%suffix%"), suffix); return result; } +vector const getAuthors(docstring const & author) +{ + // We check for goupings (via {...}) and only consider " and " + // outside groups as author separator. This is to account + // for cases such as {{Barnes and Noble, Inc.}}, which + // need to be treated as one single family name. + // We use temporary placeholders in order to differentiate the + // diverse " and " cases. + + // First, we temporarily replace all ampersands. It is rather unusual + // in author names, but can happen (consider cases such as "C \& A Corp."). + docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!")); + // Then, we temporarily make all " and " strings to ampersands in order + // to handle them later on a per-char level. + iname = subst(iname, from_ascii(" and "), from_ascii(" & ")); + // Now we traverse through the string and replace the "&" by the proper + // output in- and outside groups + docstring name; + int gl = 0; + docstring::const_iterator p = iname.begin(); + while (p != iname.end()) { + // count grouping level + if (*p == '{') + ++gl; + else if (*p == '}') + --gl; + // generate string with probable placeholders + if (*p == '&') { + if (gl > 0) + // Inside groups, we output "and" + name += from_ascii("and"); + else + // Outside groups, we output a separator + name += from_ascii("$$namesep!"); + } + else + name += *p; + ++p; + } + + // re-insert the literal ampersands + name = subst(name, from_ascii("$$amp!"), from_ascii("&")); + + // Now construct the actual vector + return getVectorFromString(name, from_ascii(" $$namesep! ")); +} + + bool multipleAuthors(docstring const author) { - vector const authors = - getVectorFromString(author, from_ascii(" and ")); - return authors.size() > 1; + return getAuthors(author).size() > 1; } @@ -227,7 +378,18 @@ docstring convertLaTeXCommands(docstring const & str) continue; } - // we just ignore braces + // Change text mode accents in the form + // {\v a} to \v{a} (see #9340). + // FIXME: This is a sort of mini-tex2lyx. + // Use the real tex2lyx instead! + static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}"); + if (lyx::regex_search(to_utf8(val), tma_reg)) { + val = val.substr(1); + val.replace(2, 1, from_ascii("{")); + continue; + } + + // Apart from the above, we just ignore braces if (ch == '{' || ch == '}') { val = val.substr(1); continue; @@ -366,13 +528,9 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, if (author.empty()) return author; - // FIXME Move this to a separate routine that can - // be called from elsewhere. - // // OK, we've got some names. Let's format them. - // Try to split the author list on " and " - vector const authors = - getVectorFromString(author, from_ascii(" and ")); + // Try to split the author list + vector const authors = getAuthors(author); docstring retval; @@ -394,15 +552,17 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, : " and "; string firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform") - : "%surname%, %prename%"; + : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}"; if (!beginning) firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform") - : "%prename% %surname%"; + : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}"; string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform") - : "%surname%, %prename%"; + : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}"; if (!beginning) othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform") - : "%prename% %surname%"; + : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}"; + string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform") + : "{%prefix%[[%prefix% ]]}%surname%"; // Shorten the list (with et al.) if forceshort is set // and the list can actually be shortened, else if maxcitenames @@ -430,13 +590,13 @@ docstring const BibTeXInfo::getAuthorList(Buffer const * buf, retval += (i == 0) ? constructName(*it, firstnameform) : constructName(*it, othernameform); else - retval += nameParts(*it).second; + retval += constructName(*it, citenameform); } if (shorten) { if (allnames) retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal)); else - retval = nameParts(authors[0]).second + (buf ? buf->B_(etal) : from_ascii(etal)); + retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal)); } return convertLaTeXCommands(retval); @@ -1207,21 +1367,21 @@ bool BiblioInfo::isBibtex(docstring const & key) const } -vector const BiblioInfo::getCiteStrings( +BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings( vector const & keys, vector const & styles, Buffer const & buf, CiteItem const & ci) const { if (empty()) - return vector(); + return vector>(); string style; - vector vec(styles.size()); - for (size_t i = 0; i != vec.size(); ++i) { + CiteStringMap csm(styles.size()); + for (size_t i = 0; i != csm.size(); ++i) { style = styles[i].name; - vec[i] = getLabel(keys, buf, style, ci); + csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci)); } - return vec; + return csm; }