+namespace {
+
+// gets the "family name" from an author-type string
+docstring familyName(docstring const & name)
+{
+ if (name.empty())
+ return docstring();
+
+ // first we look for a comma, and take the last name to be everything
+ // preceding the right-most one, so that we also get the "jr" part.
+ docstring::size_type idx = name.rfind(',');
+ if (idx != docstring::npos)
+ return ltrim(name.substr(0, idx));
+
+ // OK, so now we want to look for the last name. We're going to
+ // include the "von" part. This isn't perfect.
+ // Split on spaces, to get various tokens.
+ vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
+ // If we only get two, assume the last one is the last name
+ if (pieces.size() <= 2)
+ return pieces.back();
+
+ // Now we look for the first token that begins with a lower case letter.
+ vector<docstring>::const_iterator it = pieces.begin();
+ vector<docstring>::const_iterator en = pieces.end();
+ for (; it != en; ++it) {
+ if ((*it).empty())
+ continue;
+ char_type const c = (*it)[0];
+ if (isLower(c))
+ break;
+ }
+
+ if (it == en) // we never found a "von"
+ return pieces.back();
+
+ // reconstruct what we need to return
+ docstring retval;
+ bool first = true;
+ for (; it != en; ++it) {
+ if (!first)
+ retval += " ";
+ else
+ first = false;
+ retval += *it;
+ }
+ return retval;
+}
+
+
+// converts a string containing LaTeX commands into unicode
+// for display.
+docstring convertLaTeXCommands(docstring const & str)
+{
+ docstring val = str;
+ docstring ret;
+
+ bool scanning_cmd = false;
+ bool scanning_math = false;
+ bool escaped = false; // used to catch \$, etc.
+ while (!val.empty()) {
+ char_type const ch = val[0];
+
+ // if we're scanning math, we output everything until we
+ // find an unescaped $, at which point we break out.
+ if (scanning_math) {
+ if (escaped)
+ escaped = false;
+ else if (ch == '\\')
+ escaped = true;
+ else if (ch == '$')
+ scanning_math = false;
+ ret += ch;
+ val = val.substr(1);
+ continue;
+ }
+
+ // if we're scanning a command name, then we just
+ // discard characters until we hit something that
+ // isn't alpha.
+ if (scanning_cmd) {
+ if (isAlphaASCII(ch)) {
+ val = val.substr(1);
+ escaped = false;
+ continue;
+ }
+ // so we're done with this command.
+ // now we fall through and check this character.
+ scanning_cmd = false;
+ }
+
+ // was the last character a \? If so, then this is something like:
+ // \\ or \$, so we'll just output it. That's probably not always right...
+ if (escaped) {
+ // exception: output \, as THIN SPACE
+ if (ch == ',')
+ ret.push_back(0x2009);
+ else
+ ret += ch;
+ val = val.substr(1);
+ escaped = false;
+ continue;
+ }
+
+ if (ch == '$') {
+ ret += ch;
+ val = val.substr(1);
+ scanning_math = true;
+ continue;
+ }
+
+ // we just ignore braces
+ if (ch == '{' || ch == '}') {
+ val = val.substr(1);
+ continue;
+ }
+
+ // we're going to check things that look like commands, so if
+ // this doesn't, just output it.
+ if (ch != '\\') {
+ ret += ch;
+ val = val.substr(1);
+ continue;
+ }
+
+ // ok, could be a command of some sort
+ // let's see if it corresponds to some unicode
+ // unicodesymbols has things in the form: \"{u},
+ // whereas we may see things like: \"u. So we'll
+ // look for that and change it, if necessary.
+ // FIXME: This is a sort of mini-tex2lyx.
+ // Use the real tex2lyx instead!
+ static lyx::regex const reg("^\\\\\\W\\w");
+ if (lyx::regex_search(to_utf8(val), reg)) {
+ val.insert(3, from_ascii("}"));
+ val.insert(2, from_ascii("{"));
+ }
+ bool termination;
+ docstring rem;
+ docstring const cnvtd = Encodings::fromLaTeXCommand(val,
+ Encodings::TEXT_CMD, termination, rem);
+ if (!cnvtd.empty()) {
+ // it did, so we'll take that bit and proceed with what's left
+ ret += cnvtd;
+ val = rem;
+ continue;
+ }
+ // it's a command of some sort
+ scanning_cmd = true;
+ escaped = true;
+ val = val.substr(1);
+ }
+ return ret;
+}
+
+
+// Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
+docstring processRichtext(docstring const & str, bool richtext)
+{
+ docstring val = str;
+ docstring ret;
+
+ bool scanning_rich = false;
+ while (!val.empty()) {
+ char_type const ch = val[0];
+ if (ch == '{' && val.size() > 1 && val[1] == '!') {
+ // beginning of rich text
+ scanning_rich = true;
+ val = val.substr(2);
+ continue;
+ }
+ if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
+ // end of rich text
+ scanning_rich = false;
+ val = val.substr(2);
+ continue;
+ }
+ if (richtext) {
+ if (scanning_rich)
+ ret += ch;
+ else {
+ // we need to escape '<' and '>'
+ if (ch == '<')
+ ret += "<";
+ else if (ch == '>')
+ ret += ">";
+ else
+ ret += ch;
+ }
+ } else if (!scanning_rich /* && !richtext */)
+ ret += ch;
+ // else the character is discarded, which will happen only if
+ // richtext == false and we are scanning rich text
+ val = val.substr(1);
+ }
+ return ret;
+}
+
+} // anon namespace
+
+