X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Fdocstream.cpp;h=e8839e07f61420104c3ff49826f7dc4997123ce6;hb=faa87bf9f30b943397429a04254d96963bbf38bc;hp=682aa64d54d47d85f1a7d8d0908be3b09ce39624;hpb=0a2b62f468a95fba1cc269fa39c95e67fc93fab5;p=lyx.git diff --git a/src/support/docstream.cpp b/src/support/docstream.cpp index 682aa64d54..e8839e07f6 100644 --- a/src/support/docstream.cpp +++ b/src/support/docstream.cpp @@ -11,8 +11,10 @@ #include #include "support/docstream.h" +#include "support/lstrings.h" #include "support/unicode.h" +#include #include #include #include @@ -22,6 +24,14 @@ using namespace std; using lyx::ucs4_codeset; +using lyx::support::contains; +using lyx::support::split; + + +#if defined(_MSC_VER) && (_MSC_VER >= 1600) +std::locale::id numpunct::id; +#endif + namespace { @@ -242,33 +252,7 @@ protected: } virtual int do_max_length() const throw() { - // FIXME: this information should be transferred to lib/encodings - // UTF8 uses at most 4 bytes to represent one UCS4 code point - // (see RFC 3629). RFC 2279 specifies 6 bytes, but that - // information is outdated, and RFC 2279 has been superseded by - // RFC 3629. - // The CJK encodings use (different) multibyte representation as well. - // All other encodings encode one UCS4 code point in one byte - // (and can therefore only encode a subset of UCS4) - // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). - // Furthermore, all encodings that use shifting (like SJIS) do not work with - // iconv_codecvt_facet. - if (encoding_ == "UTF-8" || - encoding_ == "GB" || - encoding_ == "EUC-TW") - return 4; - else if (encoding_ == "EUC-JP") - return 3; - else if (encoding_ == "ISO-2022-JP") - return 5; - else if (encoding_ == "BIG5" || - encoding_ == "EUC-KR" || - encoding_ == "EUC-CN" || - encoding_ == "SJIS" || - encoding_ == "GBK") - return 2; - else - return 1; + return lyx::max_encoded_bytes(encoding_); } private: /// Do the actual conversion. The interface is equivalent to that of @@ -281,6 +265,14 @@ private: inbytesleft, to, outbytesleft); if (converted == (size_t)(-1)) { switch(errno) { + case 0: + // As strange as it may seem, this + // does happen on windows when parsing + // comments with accented chars in + // tex2lyx. See the following thread + // for details + // http://thread.gmane.org/gmane.editors.lyx.devel/117636 + break; case EINVAL: case E2BIG: return base::partial; @@ -320,9 +312,9 @@ const char * iconv_codecvt_facet_exception::what() const throw() } -ifdocstream::ifdocstream(string const & encoding) : base() +ifdocstream::ifdocstream() : base() { - setEncoding(*this, encoding, in); + setEncoding(*this, "UTF-8", in); } @@ -413,6 +405,157 @@ odocstream & operator<<(odocstream & os, char c) } #endif + +void otexstream::put(char_type const & c) +{ + if (protectspace_) { + if (!canbreakline_ && c == ' ') + os_ << "{}"; + protectspace_ = false; + } + os_.put(c); + lastchar_ = c; + if (c == '\n') { + texrow_.newline(); + canbreakline_ = false; + } else + canbreakline_ = true; +} + + +BreakLine breakln; +SafeBreakLine safebreakln; + + +otexstream & operator<<(otexstream & ots, BreakLine) +{ + if (ots.canBreakLine()) { + ots.os().put('\n'); + ots.lastChar('\n'); + ots.canBreakLine(false); + ots.texrow().newline(); + } + ots.protectSpace(false); + return ots; +} + + +otexstream & operator<<(otexstream & ots, SafeBreakLine) +{ + if (ots.canBreakLine()) { + ots.os() << "%\n"; + ots.lastChar('\n'); + ots.canBreakLine(false); + ots.texrow().newline(); + } + ots.protectSpace(false); + return ots; +} + + +otexstream & operator<<(otexstream & ots, odocstream_manip pf) +{ + ots.os() << pf; + if (pf == static_cast(endl)) { + ots.lastChar('\n'); + ots.texrow().newline(); + } + return ots; +} + + +otexstream & operator<<(otexstream & ots, docstring const & s) +{ + size_t const len = s.length(); + + // Check whether there's something to output + if (len == 0) + return ots; + + if (ots.protectSpace()) { + if (!ots.canBreakLine() && s[0] == ' ') + ots.os() << "{}"; + ots.protectSpace(false); + } + + if (contains(s, 0xF0000)) { + // Some encoding changes for the underlying stream are embedded + // in the docstring. The encoding names to be used are enclosed + // between the code points 0xF0000 and 0xF0001, the first two + // characters of plane 15, which is a Private Use Area whose + // codepoints don't have any associated glyph. + docstring s1; + docstring s2 = split(s, s1, 0xF0000); + while (true) { + if (!s1.empty()) + ots.os() << s1; + if (s2.empty()) + break; + docstring enc; + docstring const s3 = split(s2, enc, 0xF0001); + if (!contains(s2, 0xF0001)) + s2 = split(enc, s1, 0xF0000); + else { + ots.os() << setEncoding(to_ascii(enc)); + s2 = split(s3, s1, 0xF0000); + } + } + } else + ots.os() << s; + + ots.lastChar(s[len - 1]); + ots.texrow().newlines(count(s.begin(), s.end(), '\n')); + ots.canBreakLine(s[len - 1] != '\n'); + return ots; +} + + +otexstream & operator<<(otexstream & ots, string const & s) +{ + ots << from_utf8(s); + return ots; +} + + +otexstream & operator<<(otexstream & ots, char const * s) +{ + ots << from_utf8(s); + return ots; +} + + +otexstream & operator<<(otexstream & ots, char c) +{ + if (ots.protectSpace()) { + if (!ots.canBreakLine() && c == ' ') + ots.os() << "{}"; + ots.protectSpace(false); + } + ots.os() << c; + ots.lastChar(c); + if (c == '\n') + ots.texrow().newline(); + ots.canBreakLine(c != '\n'); + return ots; +} + + +template +otexstream & operator<<(otexstream & ots, Type value) +{ + ots.os() << value; + ots.lastChar(0); + ots.canBreakLine(true); + ots.protectSpace(false); + return ots; +} + +template otexstream & operator<< (otexstream & os, SetEnc); +template otexstream & operator<< (otexstream &, double); +template otexstream & operator<< (otexstream &, int); +template otexstream & operator<< (otexstream &, unsigned int); +template otexstream & operator<< (otexstream &, unsigned long); + } #if ! defined(USE_WCHAR_T) && defined(__GNUC__)