]> git.lyx.org Git - features.git/commitdiff
Convert LaTeX constructs to Unicode when reading BibTeX files.
authorRichard Heck <rgheck@comcast.net>
Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
committerRichard Heck <rgheck@comcast.net>
Mon, 29 Dec 2008 00:10:41 +0000 (00:10 +0000)
Also, ignore other LaTeX commands.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27975 a592a061-630c-0410-9148-cb99ea01b6c8

src/insets/InsetBibtex.cpp

index 0dcbf49fe9c471973eef34faedb352520ab20253..a3affb4d9363d2dea8a65164157618294cf6a0c7 100644 (file)
@@ -529,15 +529,16 @@ namespace {
        /// the variable strings.
        /// @return true if reading was successfull (all single parts were delimited
        /// correctly)
-       bool readValue(docstring & val, ifdocstream & ifs, const VarMap & strings) {
+       bool readValue(docstring & value, ifdocstream & ifs, const VarMap & strings) {
 
                char_type ch;
 
-               val.clear();
+               value.clear();
 
                if (!ifs)
                        return false;
 
+               docstring val;
                do {
                        // skip whitespace
                        do {
@@ -593,7 +594,7 @@ namespace {
                                                lastWasWhiteSpace = false;
                                                val += ' ';
                                        }
-                                       
+
                                        val += ch;
 
                                        // update nesting level
@@ -654,6 +655,90 @@ namespace {
 
                ifs.putback(ch);
 
+               // Ok, we now have the value. Now we are going to go
+               // through it and replace e.g. \"a with its unicode value.
+               // We'll also strip commands, like \emph, and the like, so 
+               // it will look nice in the UI.
+               bool scanning_cmd = false;
+               bool scanning_math = false;
+               bool escaped = false; // used to catch \$, etc.
+               while (val.size()) {
+                       char_type const ch = val[0];
+
+                       // if we're scanning math, we output everything until we
+                       // find an unescaped $, at which point we break out.
+                       if (scanning_math) {
+                               if (escaped)
+                                       escaped = false;
+                               else if (ch == '\\')
+                                       escaped = true;
+                               else if (ch == '$') 
+                                       scanning_math = false;
+                               value += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // if we're scanning a command name, then we just
+                       // discard characters until we hit something that
+                       // isn't alpha.
+                       if (scanning_cmd) {
+                               if (isAlphaASCII(ch)) {
+                                       val = val.substr(1);
+                                       escaped = false;
+                                       continue;
+                               }
+                               // so we're done with this command.
+                               // now we fall through and check this character.
+                               scanning_cmd = false;
+                       }
+
+                       // was the last character a \? If so, then this is something like: \\,
+                       // or \$, so we'll just output it. That's probably not always right...
+                       if (escaped) {
+                               value += ch;
+                               val = val.substr(1);
+                               escaped = false;
+                               continue;
+                       }
+
+                       if (ch == '$') {
+                               value += ch;
+                               val = val.substr(1);
+                               scanning_math = true;
+                               continue;
+                       }
+
+                       // we just ignore braces
+                       if (ch == '{' || ch == '}') {
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // we're going to check things that look like commands, so if
+                       // this doesn't, just output it.
+                       if (ch != '\\') {
+                               value += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // ok, could be a command of some sort
+                       // let's see if it corresponds to some unicode
+                       docstring rem;
+                       docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
+                       if (!cnvtd.empty()) {
+                               // it did, so we'll take that bit and proceed with what's left
+                               value += cnvtd;
+                               val = rem;
+                               continue;
+                       }
+                       // it's a command of some sort
+                       scanning_cmd = true;
+                       escaped = true;
+                       val = val.substr(1);
+               }
+
                return true;
        }
 }