From bb24f021887e6d9e1b605070e005163b7422215f Mon Sep 17 00:00:00 2001
From: Richard Heck <rgheck@comcast.net>
Date: Fri, 16 Jan 2009 23:42:16 +0000
Subject: [PATCH] We'll do the unicode conversion where it's actually needed:
 where we calculate what to display.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@28191 a592a061-630c-0410-9148-cb99ea01b6c8
---
 src/BiblioInfo.cpp | 105 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp
index 7251649f5a..1e32445891 100644
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -16,6 +16,7 @@
 #include "Buffer.h"
 #include "BufferParams.h"
 #include "buffer_funcs.h"
+#include "Encoding.h"
 #include "InsetIterator.h"
 #include "Paragraph.h"
 
@@ -45,7 +46,7 @@ namespace lyx {
 //////////////////////////////////////////////////////////////////////
 
 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-	: is_bibtex_(true), bib_key_(key), entry_type_(type)
+	: is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
 {}
 
 
@@ -173,6 +174,106 @@ docstring const BibTeXInfo::getYear() const
 }
 
 
+namespace {
+
+	docstring convertLaTeXCommands(docstring const & str)
+	{
+		docstring val = str;
+		docstring ret;
+	
+		bool scanning_cmd = false;
+		bool scanning_math = false;
+		bool escaped = false; // used to catch \$, etc.
+		while (val.size()) {
+			char_type const ch = val[0];
+
+			// if we're scanning math, we output everything until we
+			// find an unescaped $, at which point we break out.
+			if (scanning_math) {
+				if (escaped)
+					escaped = false;
+				else if (ch == '\\')
+					escaped = true;
+				else if (ch == '$') 
+					scanning_math = false;
+				ret += ch;
+				val = val.substr(1);
+				continue;
+			}
+
+			// if we're scanning a command name, then we just
+			// discard characters until we hit something that
+			// isn't alpha.
+			if (scanning_cmd) {
+				if (isAlphaASCII(ch)) {
+					val = val.substr(1);
+					escaped = false;
+					continue;
+				}
+				// so we're done with this command.
+				// now we fall through and check this character.
+				scanning_cmd = false;
+			}
+
+			// was the last character a \? If so, then this is something like: \\,
+			// or \$, so we'll just output it. That's probably not always right...
+			if (escaped) {
+				ret += ch;
+				val = val.substr(1);
+				escaped = false;
+				continue;
+			}
+
+			if (ch == '$') {
+				ret += ch;
+				val = val.substr(1);
+				scanning_math = true;
+				continue;
+			}
+
+			// we just ignore braces
+			if (ch == '{' || ch == '}') {
+				val = val.substr(1);
+				continue;
+			}
+
+			// we're going to check things that look like commands, so if
+			// this doesn't, just output it.
+			if (ch != '\\') {
+				ret += ch;
+				val = val.substr(1);
+				continue;
+			}
+
+			// ok, could be a command of some sort
+			// let's see if it corresponds to some unicode
+			// unicodesymbols has things in the form: \"{u},
+			// whereas we may see things like: \"u. So we'll
+			// look for that and change it, if necessary.
+			static boost::regex const reg("^\\\\\\W\\w");
+			if (boost::regex_search(to_utf8(val), reg)) {
+				val.insert(3, from_ascii("}"));
+				val.insert(2, from_ascii("{"));
+			}
+			docstring rem;
+			docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
+			if (!cnvtd.empty()) {
+				// it did, so we'll take that bit and proceed with what's left
+				ret += cnvtd;
+				val = rem;
+				continue;
+			}
+			// it's a command of some sort
+			scanning_cmd = true;
+			escaped = true;
+			val = val.substr(1);
+		}
+		return ret;
+	}
+
+} // anon namespace
+
+
 docstring const BibTeXInfo::getInfo() const
 {
 	if (!info_.empty())
@@ -229,7 +330,7 @@ docstring const BibTeXInfo::getInfo() const
 
 	docstring const result_str = rtrim(result.str());
 	if (!result_str.empty()) {
-		info_ = result_str;
+		info_ = convertLaTeXCommands(result_str);
 		return info_;
 	}
 
-- 
2.39.2