From 8a4b25875ad32ffcd4013afbcd22bc15ce84148d Mon Sep 17 00:00:00 2001 From: Enrico Forestieri Date: Sun, 23 Oct 2011 14:24:17 +0000 Subject: [PATCH] Thinking about it, it is better to always convert to docstrings all utf8 strings, and not only if they contain encoding changes. This is because if the output encoding was previously changed and an utf8 string is inserted in the stream, the encoding changes cannot occur. This was not a problem until now because normal strings could not be inserted in a odocstream, as them would have been exchanged with encoding changes. Indeed, the SetEnc struct has only a std::string member and outputting a std::string would be interpreted by the compiler the same as inserting setEncoding(std::string). However, a std::string can be inserted in an otexstream and it is better to account for this. I wonder whether trying "os << std::string", where os is an odocstream, should produce an error instead of actually trying to change the stream output encoding, but this has not been a problem until now... git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@39944 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/support/docstream.cpp | 66 ++------------------------------------- 1 file changed, 2 insertions(+), 64 deletions(-) diff --git a/src/support/docstream.cpp b/src/support/docstream.cpp index 9edd9f27c6..e8839e07f6 100644 --- a/src/support/docstream.cpp +++ b/src/support/docstream.cpp @@ -512,76 +512,14 @@ otexstream & operator<<(otexstream & ots, docstring const & s) otexstream & operator<<(otexstream & ots, string const & s) { - ots << s.c_str(); + ots << from_utf8(s); return ots; } -namespace { - -int findToken(char const * s, char const * search_token) -{ - char const * token = strstr(s, search_token); - if (token) - return token - s; - return -1; -} - -} // namespace anon - - otexstream & operator<<(otexstream & ots, char const * s) { - size_t const len = strlen(s); - - // Check whether there's something to output - if (len == 0) - return ots; - - if (ots.protectSpace()) { - if (!ots.canBreakLine() && s[0] == ' ') - ots.os() << "{}"; - ots.protectSpace(false); - } - - char const * start_token = "\xf3\xb0\x80\x80"; - char const * end_token = "\xf3\xb0\x80\x81"; - - int i = findToken(s, start_token); - - if (i >= 0) { - // Some encoding changes for the underlying stream are embedded - // in the string. The encoding names to be used are enclosed - // between the code points 0xF0000 and xF0001 (0xf3b08080 and - // 0xf3b08081 in utf8 encoding). These code points belong to - // the plane 15 Private Use Area and have no associated glyph. - string s1(s, i); - char const * s2 = s + i + 4; - while (true) { - if (!s1.empty()) - ots.os() << from_utf8(s1); - if (s2[0] == '\0') - break; - i = findToken(s2, end_token); - if (i >= 0) { - ots.os() << setEncoding(string(s2, i)); - s2 += i + 4; - } - i = findToken(s2, start_token); - if (i >= 0) { - s1 = string(s2, i); - s2 += i + 4; - } else { - s1 = s2; - s2 += strlen(s2); - } - } - } else - ots.os() << s; - - ots.lastChar(s[len - 1]); - ots.texrow().newlines(count(s, s + len, '\n')); - ots.canBreakLine(s[len - 1] != '\n'); + ots << from_utf8(s); return ots; } -- 2.39.2