We'll do the unicode conversion where it's actually needed: where we

author Richard Heck <rgheck@comcast.net>

Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)

committer Richard Heck <rgheck@comcast.net>

Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)
author Richard Heck <rgheck@comcast.net>
Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)
committer Richard Heck <rgheck@comcast.net>
Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp

index 7251649f5a8bee6b07d1612ab6d8e108c50a8592..1e32445891f00a0a8ea4dbe3ef9a48a7acc8c9f5 100644 (file)
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -16,6 +16,7 @@
  #include "Buffer.h"
  #include "BufferParams.h"
  #include "buffer_funcs.h"
+#include "Encoding.h"
  #include "InsetIterator.h"
  #include "Paragraph.h"
  
@@ -45,7 +46,7 @@ namespace lyx {
  //////////////////////////////////////////////////////////////////////
  
  BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-       : is_bibtex_(true), bib_key_(key), entry_type_(type)
+       : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
  {}
  
  
@@ -173,6 +174,106 @@ docstring const BibTeXInfo::getYear() const
  }
  
  
+namespace {
+
+       docstring convertLaTeXCommands(docstring const & str)
+       {
+               docstring val = str;
+               docstring ret;
+       
+               bool scanning_cmd = false;
+               bool scanning_math = false;
+               bool escaped = false; // used to catch \$, etc.
+               while (val.size()) {
+                       char_type const ch = val[0];
+
+                       // if we're scanning math, we output everything until we
+                       // find an unescaped $, at which point we break out.
+                       if (scanning_math) {
+                               if (escaped)
+                                       escaped = false;
+                               else if (ch == '\\')
+                                       escaped = true;
+                               else if (ch == '$') 
+                                       scanning_math = false;
+                               ret += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // if we're scanning a command name, then we just
+                       // discard characters until we hit something that
+                       // isn't alpha.
+                       if (scanning_cmd) {
+                               if (isAlphaASCII(ch)) {
+                                       val = val.substr(1);
+                                       escaped = false;
+                                       continue;
+                               }
+                               // so we're done with this command.
+                               // now we fall through and check this character.
+                               scanning_cmd = false;
+                       }
+
+                       // was the last character a \? If so, then this is something like: \\,
+                       // or \$, so we'll just output it. That's probably not always right...
+                       if (escaped) {
+                               ret += ch;
+                               val = val.substr(1);
+                               escaped = false;
+                               continue;
+                       }
+
+                       if (ch == '$') {
+                               ret += ch;
+                               val = val.substr(1);
+                               scanning_math = true;
+                               continue;
+                       }
+
+                       // we just ignore braces
+                       if (ch == '{' || ch == '}') {
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // we're going to check things that look like commands, so if
+                       // this doesn't, just output it.
+                       if (ch != '\\') {
+                               ret += ch;
+                               val = val.substr(1);
+                               continue;
+                       }
+
+                       // ok, could be a command of some sort
+                       // let's see if it corresponds to some unicode
+                       // unicodesymbols has things in the form: \"{u},
+                       // whereas we may see things like: \"u. So we'll
+                       // look for that and change it, if necessary.
+                       static boost::regex const reg("^\\\\\\W\\w");
+                       if (boost::regex_search(to_utf8(val), reg)) {
+                               val.insert(3, from_ascii("}"));
+                               val.insert(2, from_ascii("{"));
+                       }
+                       docstring rem;
+                       docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
+                       if (!cnvtd.empty()) {
+                               // it did, so we'll take that bit and proceed with what's left
+                               ret += cnvtd;
+                               val = rem;
+                               continue;
+                       }
+                       // it's a command of some sort
+                       scanning_cmd = true;
+                       escaped = true;
+                       val = val.substr(1);
+               }
+               return ret;
+       }
+
+} // anon namespace
+
+
  docstring const BibTeXInfo::getInfo() const
  {
         if (!info_.empty())
@@ -229,7 +330,7 @@ docstring const BibTeXInfo::getInfo() const
  
         docstring const result_str = rtrim(result.str());
         if (!result_str.empty()) {
-               info_ = result_str;
+               info_ = convertLaTeXCommands(result_str);
                 return info_;
         }
author	Richard Heck <rgheck@comcast.net>
	Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)
committer	Richard Heck <rgheck@comcast.net>
	Fri, 16 Jan 2009 23:42:16 +0000 (23:42 +0000)