From bb24f021887e6d9e1b605070e005163b7422215f Mon Sep 17 00:00:00 2001 From: Richard Heck Date: Fri, 16 Jan 2009 23:42:16 +0000 Subject: [PATCH] We'll do the unicode conversion where it's actually needed: where we calculate what to display. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@28191 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/BiblioInfo.cpp | 105 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp index 7251649f5a..1e32445891 100644 --- a/src/BiblioInfo.cpp +++ b/src/BiblioInfo.cpp @@ -16,6 +16,7 @@ #include "Buffer.h" #include "BufferParams.h" #include "buffer_funcs.h" +#include "Encoding.h" #include "InsetIterator.h" #include "Paragraph.h" @@ -45,7 +46,7 @@ namespace lyx { ////////////////////////////////////////////////////////////////////// BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type) - : is_bibtex_(true), bib_key_(key), entry_type_(type) + : is_bibtex_(true), bib_key_(key), entry_type_(type), info_() {} @@ -173,6 +174,106 @@ docstring const BibTeXInfo::getYear() const } +namespace { + + docstring convertLaTeXCommands(docstring const & str) + { + docstring val = str; + docstring ret; + + bool scanning_cmd = false; + bool scanning_math = false; + bool escaped = false; // used to catch \$, etc. + while (val.size()) { + char_type const ch = val[0]; + + // if we're scanning math, we output everything until we + // find an unescaped $, at which point we break out. + if (scanning_math) { + if (escaped) + escaped = false; + else if (ch == '\\') + escaped = true; + else if (ch == '$') + scanning_math = false; + ret += ch; + val = val.substr(1); + continue; + } + + // if we're scanning a command name, then we just + // discard characters until we hit something that + // isn't alpha. + if (scanning_cmd) { + if (isAlphaASCII(ch)) { + val = val.substr(1); + escaped = false; + continue; + } + // so we're done with this command. + // now we fall through and check this character. + scanning_cmd = false; + } + + // was the last character a \? If so, then this is something like: \\, + // or \$, so we'll just output it. That's probably not always right... + if (escaped) { + ret += ch; + val = val.substr(1); + escaped = false; + continue; + } + + if (ch == '$') { + ret += ch; + val = val.substr(1); + scanning_math = true; + continue; + } + + // we just ignore braces + if (ch == '{' || ch == '}') { + val = val.substr(1); + continue; + } + + // we're going to check things that look like commands, so if + // this doesn't, just output it. + if (ch != '\\') { + ret += ch; + val = val.substr(1); + continue; + } + + // ok, could be a command of some sort + // let's see if it corresponds to some unicode + // unicodesymbols has things in the form: \"{u}, + // whereas we may see things like: \"u. So we'll + // look for that and change it, if necessary. + static boost::regex const reg("^\\\\\\W\\w"); + if (boost::regex_search(to_utf8(val), reg)) { + val.insert(3, from_ascii("}")); + val.insert(2, from_ascii("{")); + } + docstring rem; + docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem); + if (!cnvtd.empty()) { + // it did, so we'll take that bit and proceed with what's left + ret += cnvtd; + val = rem; + continue; + } + // it's a command of some sort + scanning_cmd = true; + escaped = true; + val = val.substr(1); + } + return ret; + } + +} // anon namespace + + docstring const BibTeXInfo::getInfo() const { if (!info_.empty()) @@ -229,7 +330,7 @@ docstring const BibTeXInfo::getInfo() const docstring const result_str = rtrim(result.str()); if (!result_str.empty()) { - info_ = result_str; + info_ = convertLaTeXCommands(result_str); return info_; } -- 2.39.2