Convert LaTeX constructs to Unicode when reading BibTeX files.

author Richard Heck <rgheck@comcast.net>

Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)

committer Richard Heck <rgheck@comcast.net>

Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
author Richard Heck <rgheck@comcast.net>
Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
committer Richard Heck <rgheck@comcast.net>
Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp

index 0dcbf49fe9c471973eef34faedb352520ab20253..a3affb4d9363d2dea8a65164157618294cf6a0c7 100644 (file)
--- a/src/insets/InsetBibtex.cpp
+++ b/src/insets/InsetBibtex.cpp
@@ -529,15 +529,16 @@ namespace {
         /// the variable strings.
         /// @return true if reading was successfull (all single parts were delimited
         /// correctly)
-       bool readValue(docstring & val, ifdocstream & ifs, const VarMap & strings) {
+       bool readValue(docstring & value, ifdocstream & ifs, const VarMap & strings) {
  
                 char_type ch;
  
-               val.clear();
+               value.clear();
  
                 if (!ifs)
                         return false;
  
+               docstring val;
                 do {
                         // skip whitespace
                         do {
@@ -593,7 +594,7 @@ namespace {
                                                 lastWasWhiteSpace = false;
                                                 val += ' ';
                                         }
-                                       
+
                                         val += ch;
  
                                         // update nesting level
@@ -654,6 +655,90 @@ namespace {
  
                 ifs.putback(ch);
  
+               // Ok, we now have the value. Now we are going to go
+               // through it and replace e.g. \"a with its unicode value.
+               // We'll also strip commands, like \emph, and the like, so 
+               // it will look nice in the UI.
+               bool scanning_cmd = false;
+               bool scanning_math = false;
+               bool escaped = false; // used to catch \$, etc.
+               while (val.size()) {
+                       char_type const ch = val[0];
+
+                       // if we're scanning math, we output everything until we
+                       // find an unescaped $, at which point we break out.
+                       if (scanning_math) {
+                               if (escaped)
+                                       escaped = false;
+                               else if (ch == '\\')
+                                       escaped = true;
+                               else if (ch == '$') 
+                                       scanning_math = false;
+                               value += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // if we're scanning a command name, then we just
+                       // discard characters until we hit something that
+                       // isn't alpha.
+                       if (scanning_cmd) {
+                               if (isAlphaASCII(ch)) {
+                                       val = val.substr(1);
+                                       escaped = false;
+                                       continue;
+                               }
+                               // so we're done with this command.
+                               // now we fall through and check this character.
+                               scanning_cmd = false;
+                       }
+
+                       // was the last character a \? If so, then this is something like: \\,
+                       // or \$, so we'll just output it. That's probably not always right...
+                       if (escaped) {
+                               value += ch;
+                               val = val.substr(1);
+                               escaped = false;
+                               continue;
+                       }
+
+                       if (ch == '$') {
+                               value += ch;
+                               val = val.substr(1);
+                               scanning_math = true;
+                               continue;
+                       }
+
+                       // we just ignore braces
+                       if (ch == '{' || ch == '}') {
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // we're going to check things that look like commands, so if
+                       // this doesn't, just output it.
+                       if (ch != '\\') {
+                               value += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // ok, could be a command of some sort
+                       // let's see if it corresponds to some unicode
+                       docstring rem;
+                       docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
+                       if (!cnvtd.empty()) {
+                               // it did, so we'll take that bit and proceed with what's left
+                               value += cnvtd;
+                               val = rem;
+                               continue;
+                       }
+                       // it's a command of some sort
+                       scanning_cmd = true;
+                       escaped = true;
+                       val = val.substr(1);
+               }
+
                 return true;
         }
  }
author	Richard Heck <rgheck@comcast.net>
	Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
committer	Richard Heck <rgheck@comcast.net>
	Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)