Fix output of labels and references that contain characters with an UCS4

author Georg Baum <Georg.Baum@post.rwth-aachen.de>

Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)

committer Georg Baum <Georg.Baum@post.rwth-aachen.de>

Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)
author Georg Baum <Georg.Baum@post.rwth-aachen.de>
Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)
committer Georg Baum <Georg.Baum@post.rwth-aachen.de>
Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)
diff --git a/lib/lyx2lyx/lyx_1_4.py b/lib/lyx2lyx/lyx_1_4.py

index 039851b84f3ea1ff37e71046db99bacb09b36aa2..5d35f1d0232f66ac84585e6f5ddf6c389c19ee90 100644 (file)
--- a/lib/lyx2lyx/lyx_1_4.py
+++ b/lib/lyx2lyx/lyx_1_4.py
@@ -202,7 +202,7 @@ def revert_space_names(document):
  
  
  def lyx_support_escape(lab):
  
  
  def lyx_support_escape(lab):
-    " Equivalent to lyx::support::escape()"
+    " Equivalent to pre-unicode lyx::support::escape()"
      hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
      enc = ""
      hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
      enc = ""
diff --git a/src/support/lstrings.C b/src/support/lstrings.C

index 939d48531b78e621422d5a2a2ee8bc2fad22d9e9..9761da6d34ba6add3551cc7c2177020a49992b4e 100644 (file)
--- a/src/support/lstrings.C
+++ b/src/support/lstrings.C
@@ -702,21 +702,27 @@ string const rsplit(string const & a, string & piece, char delim)
  }
  
  
  }
  
  
-// This function escapes 8-bit characters and other problematic
-// characters that cause problems in latex labels.
  docstring const escape(docstring const & lab)
  {
  docstring const escape(docstring const & lab)
  {
-       lyx::char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
-                             '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+       char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+                                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
         docstring enc;
         for (docstring::size_type i = 0; i < lab.length(); ++i) {
         docstring enc;
         for (docstring::size_type i = 0; i < lab.length(); ++i) {
-               lyx::char_type c = lab[i];
-               // FIXME We must change the following algorithm for UCS4
-               // chars, but that will be a file format change.
+               char_type c = lab[i];
                 if (c >= 128 || c == '=' || c == '%') {
                 if (c >= 128 || c == '=' || c == '%') {
+                       // Although char_type is a 32 bit type we know that
+                       // UCS4 occupies only 21 bits, so we don't need to
+                       // encode bigger values. Test for 2^24 because we
+                       // can encode that with the 6 hex digits that are
+                       // needed for 21 bits anyway.
+                       BOOST_ASSERT(c < (1 << 24));
                         enc += '=';
                         enc += '=';
-                       enc += hexdigit[c>>4];
-                       enc += hexdigit[c & 15];
+                       enc += hexdigit[(c>>20) & 15];
+                       enc += hexdigit[(c>>16) & 15];
+                       enc += hexdigit[(c>>12) & 15];
+                       enc += hexdigit[(c>> 8) & 15];
+                       enc += hexdigit[(c>> 4) & 15];
+                       enc += hexdigit[ c      & 15];
                 } else {
                         enc += c;
                 }
                 } else {
                         enc += c;
                 }
diff --git a/src/support/lstrings.h b/src/support/lstrings.h

index 16288e6f44350e59b76ae653bbba5014bee6394f..21d57eeb91380727a1cbe65a4a8d1c299477170e 100644 (file)
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@@ -216,7 +216,8 @@ std::string const split(std::string const & a, char delim);
  /// Same as split but uses the last delim.
  std::string const rsplit(std::string const & a, std::string & piece, char delim);
  
  /// Same as split but uses the last delim.
  std::string const rsplit(std::string const & a, std::string & piece, char delim);
  
-/// Escapes non ASCII chars
+/// Escapes non ASCII chars and other problematic characters that cause
+/// problems in latex labels.
  docstring const escape(docstring const & lab);
  
  /// gives a vector of stringparts which have the delimiter delim
  docstring const escape(docstring const & lab);
  
  /// gives a vector of stringparts which have the delimiter delim
author	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)
committer	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Sun, 12 Nov 2006 13:42:20 +0000 (13:42 +0000)
lib/lyx2lyx/lyx_1_4.py		patch \| blob \| history
src/support/lstrings.C		patch \| blob \| history
src/support/lstrings.h		patch \| blob \| history