codepoint > 255.
I decided that this is no file format change: We already had the change to
format 249 that allowed unicode in .lyx files. The output to .tex of
non-ascii characters is now different than before (even for those that were
supported previously, e.g. german umlauts in latin1), but this is only
relevant if people referenced a label in ERT. Since we cannot detect this
anyway we don't need a file format change.
* src/support/lstrings.C
(escape): Extend the escaping algorithm from 8 bit to 24 bit.
* src/support/lstrings.h
(escape): Update comment
* lib/lyx2lyx/lyx_1_4.py
(lyx_support_escape): Update comment
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15883
a592a061-630c-0410-9148-
cb99ea01b6c8
def lyx_support_escape(lab):
def lyx_support_escape(lab):
- " Equivalent to lyx::support::escape()"
+ " Equivalent to pre-unicode lyx::support::escape()"
hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
enc = ""
hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
enc = ""
-// This function escapes 8-bit characters and other problematic
-// characters that cause problems in latex labels.
docstring const escape(docstring const & lab)
{
docstring const escape(docstring const & lab)
{
- lyx::char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
docstring enc;
for (docstring::size_type i = 0; i < lab.length(); ++i) {
docstring enc;
for (docstring::size_type i = 0; i < lab.length(); ++i) {
- lyx::char_type c = lab[i];
- // FIXME We must change the following algorithm for UCS4
- // chars, but that will be a file format change.
if (c >= 128 || c == '=' || c == '%') {
if (c >= 128 || c == '=' || c == '%') {
+ // Although char_type is a 32 bit type we know that
+ // UCS4 occupies only 21 bits, so we don't need to
+ // encode bigger values. Test for 2^24 because we
+ // can encode that with the 6 hex digits that are
+ // needed for 21 bits anyway.
+ BOOST_ASSERT(c < (1 << 24));
- enc += hexdigit[c>>4];
- enc += hexdigit[c & 15];
+ enc += hexdigit[(c>>20) & 15];
+ enc += hexdigit[(c>>16) & 15];
+ enc += hexdigit[(c>>12) & 15];
+ enc += hexdigit[(c>> 8) & 15];
+ enc += hexdigit[(c>> 4) & 15];
+ enc += hexdigit[ c & 15];
/// Same as split but uses the last delim.
std::string const rsplit(std::string const & a, std::string & piece, char delim);
/// Same as split but uses the last delim.
std::string const rsplit(std::string const & a, std::string & piece, char delim);
-/// Escapes non ASCII chars
+/// Escapes non ASCII chars and other problematic characters that cause
+/// problems in latex labels.
docstring const escape(docstring const & lab);
/// gives a vector of stringparts which have the delimiter delim
docstring const escape(docstring const & lab);
/// gives a vector of stringparts which have the delimiter delim