+namespace {
+
+// Remove placeholders from names
+docstring renormalize(docstring const & input)
+{
+ docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
+ return subst(res, from_ascii("$$comma!"), from_ascii(","));
+}
+
+
+// Split the surname into prefix ("von-part") and family name
+pair<docstring, docstring> parseSurname(docstring const & sname)
+{
+ // Split the surname into its tokens
+ vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
+ if (pieces.size() < 2)
+ return make_pair(docstring(), sname);
+
+ // Now we look for pieces that begin with a lower case letter.
+ // All except for the very last token constitute the "von-part".
+ docstring prefix;
+ vector<docstring>::const_iterator it = pieces.begin();
+ vector<docstring>::const_iterator const en = pieces.end();
+ bool first = true;
+ for (; it != en; ++it) {
+ if ((*it).empty())
+ continue;
+ // If this is the last piece, then what we now have is
+ // the family name, notwithstanding the casing.
+ if (it + 1 == en)
+ break;
+ char_type const c = (*it)[0];
+ // If the piece starts with a upper case char, we assume
+ // this is part of the surname.
+ if (!isLower(c))
+ break;
+ // Nothing of the former, so add this piece to the prename
+ if (!first)
+ prefix += " ";
+ else
+ first = false;
+ prefix += *it;
+ }
+
+ // Reconstruct the family name.
+ // Note that if we left the loop with because it + 1 == en,
+ // then this will still do the right thing, i.e., make surname
+ // just be the last piece.
+ docstring surname;
+ first = true;
+ for (; it != en; ++it) {
+ if (!first)
+ surname += " ";
+ else
+ first = false;
+ surname += *it;
+ }
+ return make_pair(prefix, surname);
+}
+
+
+struct name_parts {
+ docstring surname;
+ docstring prename;
+ docstring suffix;
+ docstring prefix;
+};
+
+
+// gets the name parts (prename, surname, prefix, suffix) from an author-type string
+name_parts nameParts(docstring const & iname)
+{
+ name_parts res;
+ if (iname.empty())
+ return res;
+
+ // First we check for goupings (via {...}) and replace blanks and
+ // commas inside groups with temporary placeholders
+ docstring name;
+ int gl = 0;
+ docstring::const_iterator p = iname.begin();
+ while (p != iname.end()) {
+ // count grouping level
+ if (*p == '{')
+ ++gl;
+ else if (*p == '}')
+ --gl;
+ // generate string with probable placeholders
+ if (*p == ' ' && gl > 0)
+ name += from_ascii("$$space!");
+ else if (*p == ',' && gl > 0)
+ name += from_ascii("$$comma!");
+ else
+ name += *p;
+ ++p;
+ }
+
+ // Now we look for a comma, and take the last name to be everything
+ // preceding the right-most one, so that we also get the name suffix
+ // (aka "jr" part).
+ vector<docstring> pieces = getVectorFromString(name);
+ if (pieces.size() > 1) {
+ // Whether we have a name suffix or not, the prename is
+ // always last item
+ res.prename = renormalize(pieces.back());
+ // The family name, conversely, is always the first item.
+ // However, it might contain a prefix (aka "von" part)
+ docstring const sname = pieces.front();
+ res.prefix = renormalize(parseSurname(sname).first);
+ res.surname = renormalize(parseSurname(sname).second);
+ // If we have three pieces (the maximum allowed by BibTeX),
+ // the second one is the name suffix.
+ if (pieces.size() > 2)
+ res.suffix = renormalize(pieces.at(1));
+ return res;
+ }
+
+ // OK, so now we want to look for the last name.
+ // Split on spaces, to get various tokens.
+ pieces = getVectorFromString(name, from_ascii(" "));
+ // No space: Only a family name given
+ if (pieces.size() < 2) {
+ res.surname = renormalize(pieces.back());
+ return res;
+ }
+ // If we get two pieces, assume "prename surname"
+ if (pieces.size() == 2) {
+ res.prename = renormalize(pieces.front());
+ res.surname = renormalize(pieces.back());
+ return res;
+ }
+
+ // More than 3 pieces: A name prefix (aka "von" part) might be included.
+ // We look for the first piece that begins with a lower case letter
+ // (which is the name prefix, if it is not the last token) or the last token.
+ docstring prename;
+ vector<docstring>::const_iterator it = pieces.begin();
+ vector<docstring>::const_iterator const en = pieces.end();
+ bool first = true;
+ for (; it != en; ++it) {
+ if ((*it).empty())
+ continue;
+ char_type const c = (*it)[0];
+ // If the piece starts with a lower case char, we assume
+ // this is the name prefix and thus prename is complete.
+ if (isLower(c))
+ break;
+ // Same if this is the last piece, which is always the surname.
+ if (it + 1 == en)
+ break;
+ // Nothing of the former, so add this piece to the prename
+ if (!first)
+ prename += " ";
+ else
+ first = false;
+ prename += *it;
+ }
+
+ // Now reconstruct the family name and strip the prefix.
+ // Note that if we left the loop because it + 1 == en,
+ // then this will still do the right thing, i.e., make surname
+ // just be the last piece.
+ docstring surname;
+ first = true;
+ for (; it != en; ++it) {
+ if (!first)
+ surname += " ";
+ else
+ first = false;
+ surname += *it;
+ }
+ res.prename = renormalize(prename);
+ res.prefix = renormalize(parseSurname(surname).first);
+ res.surname = renormalize(parseSurname(surname).second);
+ return res;
+}
+
+
+docstring constructName(docstring const & name, string const scheme)
+{
+ // re-constructs a name from name parts according
+ // to a given scheme
+ docstring const prename = nameParts(name).prename;
+ docstring const surname = nameParts(name).surname;
+ docstring const prefix = nameParts(name).prefix;
+ docstring const suffix = nameParts(name).suffix;
+ string res = scheme;
+ static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+ static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+ static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
+ smatch sub;
+ if (regex_match(scheme, sub, reg1)) {
+ res = sub.str(1);
+ if (!prename.empty())
+ res += sub.str(3);
+ res += sub.str(5);
+ }
+ if (regex_match(res, sub, reg2)) {
+ res = sub.str(1);
+ if (!suffix.empty())
+ res += sub.str(3);
+ res += sub.str(5);
+ }
+ if (regex_match(res, sub, reg3)) {
+ res = sub.str(1);
+ if (!prefix.empty())
+ res += sub.str(3);
+ res += sub.str(5);
+ }
+ docstring result = from_ascii(res);
+ result = subst(result, from_ascii("%prename%"), prename);
+ result = subst(result, from_ascii("%surname%"), surname);
+ result = subst(result, from_ascii("%prefix%"), prefix);
+ result = subst(result, from_ascii("%suffix%"), suffix);
+ return result;
+}
+
+
+vector<docstring> const getAuthors(docstring const & author)
+{
+ // We check for goupings (via {...}) and only consider " and "
+ // outside groups as author separator. This is to account
+ // for cases such as {{Barnes and Noble, Inc.}}, which
+ // need to be treated as one single family name.
+ // We use temporary placeholders in order to differentiate the
+ // diverse " and " cases.
+
+ // First, we temporarily replace all ampersands. It is rather unusual
+ // in author names, but can happen (consider cases such as "C \& A Corp.").
+ docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
+ // Then, we temporarily make all " and " strings to ampersands in order
+ // to handle them later on a per-char level.
+ iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
+ // Now we traverse through the string and replace the "&" by the proper
+ // output in- and outside groups
+ docstring name;
+ int gl = 0;
+ docstring::const_iterator p = iname.begin();
+ while (p != iname.end()) {
+ // count grouping level
+ if (*p == '{')
+ ++gl;
+ else if (*p == '}')
+ --gl;
+ // generate string with probable placeholders
+ if (*p == '&') {
+ if (gl > 0)
+ // Inside groups, we output "and"
+ name += from_ascii("and");
+ else
+ // Outside groups, we output a separator
+ name += from_ascii("$$namesep!");
+ }
+ else
+ name += *p;
+ ++p;
+ }
+
+ // re-insert the literal ampersands
+ name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
+
+ // Now construct the actual vector
+ return getVectorFromString(name, from_ascii(" $$namesep! "));
+}
+
+
+bool multipleAuthors(docstring const author)
+{
+ return getAuthors(author).size() > 1;
+}
+
+
+// converts a string containing LaTeX commands into unicode
+// for display.
+docstring convertLaTeXCommands(docstring const & str)
+{
+ docstring val = str;
+ docstring ret;
+
+ bool scanning_cmd = false;
+ bool scanning_math = false;
+ bool escaped = false; // used to catch \$, etc.
+ while (!val.empty()) {
+ char_type const ch = val[0];
+
+ // if we're scanning math, we output everything until we
+ // find an unescaped $, at which point we break out.
+ if (scanning_math) {
+ if (escaped)
+ escaped = false;
+ else if (ch == '\\')
+ escaped = true;
+ else if (ch == '$')
+ scanning_math = false;
+ ret += ch;
+ val = val.substr(1);
+ continue;
+ }
+
+ // if we're scanning a command name, then we just
+ // discard characters until we hit something that
+ // isn't alpha.
+ if (scanning_cmd) {
+ if (isAlphaASCII(ch)) {
+ val = val.substr(1);
+ escaped = false;
+ continue;
+ }
+ // so we're done with this command.
+ // now we fall through and check this character.
+ scanning_cmd = false;
+ }
+
+ // was the last character a \? If so, then this is something like:
+ // \\ or \$, so we'll just output it. That's probably not always right...
+ if (escaped) {
+ // exception: output \, as THIN SPACE
+ if (ch == ',')
+ ret.push_back(0x2009);
+ else
+ ret += ch;
+ val = val.substr(1);
+ escaped = false;
+ continue;
+ }
+
+ if (ch == '$') {
+ ret += ch;
+ val = val.substr(1);
+ scanning_math = true;
+ continue;
+ }
+
+ // Change text mode accents in the form
+ // {\v a} to \v{a} (see #9340).
+ // FIXME: This is a sort of mini-tex2lyx.
+ // Use the real tex2lyx instead!
+ static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
+ if (lyx::regex_search(to_utf8(val), tma_reg)) {
+ val = val.substr(1);
+ val.replace(2, 1, from_ascii("{"));
+ continue;
+ }
+
+ // Apart from the above, we just ignore braces
+ if (ch == '{' || ch == '}') {
+ val = val.substr(1);
+ continue;
+ }
+
+ // we're going to check things that look like commands, so if
+ // this doesn't, just output it.
+ if (ch != '\\') {
+ ret += ch;
+ val = val.substr(1);
+ continue;
+ }
+
+ // ok, could be a command of some sort
+ // let's see if it corresponds to some unicode
+ // unicodesymbols has things in the form: \"{u},
+ // whereas we may see things like: \"u. So we'll
+ // look for that and change it, if necessary.
+ // FIXME: This is a sort of mini-tex2lyx.
+ // Use the real tex2lyx instead!
+ static lyx::regex const reg("^\\\\\\W\\w");
+ if (lyx::regex_search(to_utf8(val), reg)) {
+ val.insert(3, from_ascii("}"));
+ val.insert(2, from_ascii("{"));
+ }
+ bool termination;
+ docstring rem;
+ docstring const cnvtd = Encodings::fromLaTeXCommand(val,
+ Encodings::TEXT_CMD, termination, rem);
+ if (!cnvtd.empty()) {
+ // it did, so we'll take that bit and proceed with what's left
+ ret += cnvtd;
+ val = rem;
+ continue;
+ }
+ // it's a command of some sort
+ scanning_cmd = true;
+ escaped = true;
+ val = val.substr(1);
+ }
+ return ret;
+}
+
+
+// Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
+docstring processRichtext(docstring const & str, bool richtext)
+{
+ docstring val = str;
+ docstring ret;
+
+ bool scanning_rich = false;
+ while (!val.empty()) {
+ char_type const ch = val[0];
+ if (ch == '{' && val.size() > 1 && val[1] == '!') {
+ // beginning of rich text
+ scanning_rich = true;
+ val = val.substr(2);
+ continue;
+ }
+ if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
+ // end of rich text
+ scanning_rich = false;
+ val = val.substr(2);
+ continue;
+ }
+ if (richtext) {
+ if (scanning_rich)
+ ret += ch;
+ else {
+ // we need to escape '<' and '>'
+ if (ch == '<')
+ ret += "<";
+ else if (ch == '>')
+ ret += ">";
+ else
+ ret += ch;
+ }
+ } else if (!scanning_rich /* && !richtext */)
+ ret += ch;
+ // else the character is discarded, which will happen only if
+ // richtext == false and we are scanning rich text
+ val = val.substr(1);
+ }
+ return ret;
+}
+
+} // anon namespace
+