From bcb1654b15ee0a1efe8e4e1f7ddde4faeeae1cf4 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Tue, 9 Jan 2007 19:25:40 +0000 Subject: [PATCH] Make the output of \inputencoding commands (and the change of output encodings) more fine grained: From paragraph level to individual character level. The inputenc package supports that since at least 2000. * src/insets/insetbase.h (latex): Document the differences between output to a string stream and a file stream * src/buffer.h (writeLaTeXSource): ditto * src/output_latex.h (latexParagraphs): ditto (switchEncoding): new function to change the encoding of a stream (and output \inputencoding commands) * src/paragraph_pimpl.C (Paragraph::Pimpl::simpleTeXSpecialC): Adjust to latexWriteEndChanges changes * src/support/docstream.[Ch] (setEncoding, operator<<): New stream modifier that changes the encoding of file streams * src/lyxfont.[Ch] (LyXFont::latexWriteStartChanges): Change stream encoding if needed (LyXFont::latexWriteEndChanges): Change stream encoding if needed * src/lyxfont.h (public:): * src/paragraph.C (Paragraph::simpleTeXOnePar): Adjust to latexWriteStartChanges and latexWriteEndChanges changes (bool Paragraph::simpleTeXOnePar): (bool Paragraph::simpleTeXOnePar): (bool Paragraph::simpleTeXOnePar): (bool Paragraph::simpleTeXOnePar): (bool Paragraph::simpleTeXOnePar): * src/output_latex.C (TeXOnePar): Remove the ugly hack to for wencoding changes and use switchEncoding instead. A nice side effect is that the old hack would not work if the main language encoding is latin1 and a character would be mapped to a cedilla in the "fake ucs4" encoding, because iconv refuses to convert such a character to latin1, although it exists in latin1 (it wants to attach it to a base character). git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16633 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/buffer.h | 22 ++++++- src/insets/insetbase.h | 7 ++- src/lyxfont.C | 12 +++- src/lyxfont.h | 7 ++- src/output_latex.C | 135 +++++++++++++++------------------------- src/output_latex.h | 13 +++- src/paragraph.C | 28 +++++---- src/paragraph_pimpl.C | 9 ++- src/support/docstream.C | 26 ++++++++ src/support/docstream.h | 19 ++++++ 10 files changed, 167 insertions(+), 111 deletions(-) diff --git a/src/buffer.h b/src/buffer.h index cdf07d68c6..98d7669eae 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -146,13 +146,31 @@ public: /// Write file. Returns \c false if unsuccesful. bool writeFile(support::FileName const &) const; - /// Just a wrapper for the method below, first creating the ofstream. + /// Just a wrapper for writeLaTeXSource, first creating the ofstream. bool makeLaTeXFile(support::FileName const & filename, std::string const & original_path, OutputParams const &, bool output_preamble = true, bool output_body = true); - /// + /** Export the buffer to LaTeX. + If \p os is a file stream, and params().inputenc == "auto", and + the buffer contains text in different languages with more than + one encoding, then this method will change the encoding + associated to \p os. Therefore you must not call this method with + a string stream if the output is supposed to go to a file. \code + odocfstream ofs; + ofs.open("test.tex"); + writeLaTeXSource(ofs, ...); + ofs.close(); + \endcode is NOT equivalent to \code + odocstringstream oss; + writeLaTeXSource(oss, ...); + odocfstream ofs; + ofs.open("test.tex"); + ofs << oss.str(); + ofs.close(); + \endcode + */ void writeLaTeXSource(odocstream & os, std::string const & original_path, OutputParams const &, diff --git a/src/insets/insetbase.h b/src/insets/insetbase.h index f4eb679add..4f90fc1fa1 100644 --- a/src/insets/insetbase.h +++ b/src/insets/insetbase.h @@ -369,7 +369,12 @@ public: virtual void write(Buffer const &, std::ostream &) const {} /// read inset in .lyx format virtual void read(Buffer const &, LyXLex &) {} - /// returns the number of rows (\n's) of generated tex code. + /** Export the inset to LaTeX. + * Don't use a temporary stringstream if the final output is + * supposed to go to a file. + * \sa Buffer::writeLaTeXSource for the reason. + * \return the number of rows (\n's) of generated LaTeX code. + */ virtual int latex(Buffer const &, odocstream &, OutputParams const &) const { return 0; } /// returns true to override begin and end inset in file diff --git a/src/lyxfont.C b/src/lyxfont.C index fb02fdced8..3b5e883d23 100644 --- a/src/lyxfont.C +++ b/src/lyxfont.C @@ -23,6 +23,7 @@ #include "LColor.h" #include "lyxlex.h" #include "lyxrc.h" +#include "output_latex.h" #include "support/lstrings.h" @@ -737,11 +738,13 @@ void LyXFont::lyxWriteChanges(LyXFont const & orgfont, /// Writes the head of the LaTeX needed to impose this font // Returns number of chars written. int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base, - LyXFont const & prev) const + LyXFont const & prev, + BufferParams const & bparams) const { - int count = 0; bool env = false; + int count = switchEncoding(os, bparams, *(prev.language()->encoding()), + *(language()->encoding())); if (language()->babel() != base.language()->babel() && language() != prev.language()) { if (isRightToLeft() != prev.isRightToLeft()) { @@ -833,7 +836,8 @@ int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base, // Returns number of chars written // This one corresponds to latexWriteStartChanges(). (Asger) int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base, - LyXFont const & next) const + LyXFont const & next, + BufferParams const & bparams) const { int count = 0; bool env = false; @@ -897,6 +901,8 @@ int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base, os << '}'; ++count; } + count += switchEncoding(os, bparams, *(language()->encoding()), + *(next.language()->encoding())); return count; } diff --git a/src/lyxfont.h b/src/lyxfont.h index b8ac551655..0cfaaf24c5 100644 --- a/src/lyxfont.h +++ b/src/lyxfont.h @@ -300,14 +300,17 @@ public: font state active now. */ int latexWriteStartChanges(odocstream &, LyXFont const & base, - LyXFont const & prev) const; + LyXFont const & prev, + BufferParams const &) const; /** Writes the tail of the LaTeX needed to change to this font. Returns number of chars written. Base is the font state we want to achieve. */ int latexWriteEndChanges(odocstream &, LyXFont const & base, - LyXFont const & next) const; + LyXFont const & next, + BufferParams const &) const; + /// Build GUI description of font state docstring const stateText(BufferParams * params) const; diff --git a/src/output_latex.C b/src/output_latex.C index f7acbf5ded..59a69511c5 100644 --- a/src/output_latex.C +++ b/src/output_latex.C @@ -29,7 +29,6 @@ #include "insets/insetoptarg.h" #include "support/lstrings.h" -#include "support/unicode.h" namespace lyx { @@ -237,7 +236,7 @@ ParagraphList::const_iterator TeXOnePar(Buffer const & buf, ParagraphList const & paragraphs, ParagraphList::const_iterator pit, - odocstream & ucs4, TexRow & texrow, + odocstream & os, TexRow & texrow, OutputParams const & runparams_in, string const & everypar) { @@ -275,49 +274,42 @@ TeXOnePar(Buffer const & buf, if (!lyxrc.language_command_end.empty() && previous_language->babel() != doc_language->babel()) { - ucs4 << from_ascii(subst(lyxrc.language_command_end, + os << from_ascii(subst(lyxrc.language_command_end, "$$lang", previous_language->babel())) - << '\n'; + << '\n'; texrow.newline(); } if (lyxrc.language_command_end.empty() || language->babel() != doc_language->babel()) { - ucs4 << from_ascii(subst( + os << from_ascii(subst( lyxrc.language_command_begin, "$$lang", language->babel())) - << '\n'; + << '\n'; texrow.newline(); } } - // FIXME thailatex does not support the inputenc package, so we - // ignore switches from/to tis620-0 encoding here. This does of - // course only work as long as the non-thai text contains ASCII - // only, but it is the best we can do. - bool const use_thailatex = (language->encoding()->name() == "tis620-0" || - previous_language->encoding()->name() == "tis620-0"); - if (bparams.inputenc == "auto" && - language->encoding() != previous_language->encoding() && - !use_thailatex) { - ucs4 << "\\inputencoding{" - << from_ascii(language->encoding()->latexName()) - << "}\n"; + LyXFont const outerfont = + outerFont(std::distance(paragraphs.begin(), pit), + paragraphs); + // This must be identical to basefont in Paragraph::simpleTeXOnePar + LyXFont basefont = (pit->beginOfBody() > 0) ? + pit->getLabelFont(bparams, outerfont) : + pit->getLayoutFont(bparams, outerfont); + Encoding const & outer_encoding(*(outerfont.language()->encoding())); + // FIXME we switch from the outer encoding to the encoding of + // this paragraph, since I could not figure out the correct + // logic to take the encoding of the previous paragraph into + // account. This may result in some unneeded encoding changes. + if (switchEncoding(os, bparams, outer_encoding, + *(basefont.language()->encoding()))) { + os << '\n'; texrow.newline(); } - // We need to output the paragraph to a temporary stream if we - // need to change the encoding. Don't do this if the result does - // not go to a file but to the builtin source viewer. - odocstringstream par_stream; - bool const change_encoding = !runparams_in.dryrun && - bparams.inputenc == "auto" && - language->encoding() != doc_language->encoding() && - !use_thailatex; - // don't trigger the copy ctor because it's private on msvc - odocstream & os = *(change_encoding ? &par_stream : &ucs4); // In an inset with unlimited length (all in one row), // don't allow any special options in the paragraph @@ -376,9 +368,6 @@ TeXOnePar(Buffer const & buf, // FIXME UNICODE os << from_utf8(everypar); - LyXFont const outerfont = - outerFont(std::distance(paragraphs.begin(), pit), - paragraphs); bool need_par = pit->simpleTeXOnePar(buf, bparams, outerfont, os, texrow, runparams); @@ -478,6 +467,17 @@ TeXOnePar(Buffer const & buf, texrow.newline(); } + // FIXME we switch from the encoding of this paragraph to the + // outer encoding, since I could not figure out the correct logic + // to take the encoding of the next paragraph into account. + // This may result in some unneeded encoding changes. + basefont = pit->getLayoutFont(bparams, outerfont); + if (switchEncoding(os, bparams, *(basefont.language()->encoding()), + outer_encoding)) { + os << '\n'; + texrow.newline(); + } + // we don't need it for the last paragraph!!! // Note from JMarc: we will re-add a \n explicitely in // TeXEnvironment, because it is needed in this case @@ -490,59 +490,6 @@ TeXOnePar(Buffer const & buf, lyxerr.debugging(Debug::LATEX)) lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl; - if (change_encoding) { - lyxerr[Debug::LATEX] << "Converting paragraph to encoding " - << language->encoding()->iconvName() << endl; - docstring const par = par_stream.str(); - // Convert the paragraph to the 8bit encoding that we need to - // output. - std::vector const encoded = lyx::ucs4_to_eightbit(par.c_str(), - par.size(), language->encoding()->iconvName()); - // Interpret this as if it was in the 8 bit encoding of the - // document language and convert it back to UCS4. That means - // that faked does not contain pure UCS4 anymore, but what - // will be written to the output file will be correct, because - // the real output stream will do a UCS4 -> document language - // encoding conversion. - // This is of course a hack, but not a bigger one than mixing - // two encodings in one file. - // FIXME: Catch iconv conversion errors and display an error - // dialog. - - // Here follows an explanation how I (gb) came to the current - // solution: - - // codecvt facets are only used by file streams -> OK, maybe - // we could use file streams and not generic streams in the - // latex() methods? No, that does not work, we use them at - // several places to write to string streams. - // Next try: Maybe we could do something else than codecvt - // in our streams, and add a setEncoding() method? That - // does not work unless we rebuild the functionality of file - // and string streams, since both odocfstream and - // odocstringstream inherit from std::basic_ostream - // and we can neither add a method to that class nor change - // the inheritance of the file and string streams. - - // What might be possible is to encapsulate the real file and - // string streams in our own version, and use a homemade - // streambuf that would do the encoding conversion and then - // forward to the real stream. That would probably work, but - // would require far more code and a good understanding of - // stream buffers to get it right. - - // Another idea by JMarc is to use a modifier like - // os << setencoding("iso-8859-1"); - // That currently looks like the best idea. - - std::vector const faked = lyx::eightbit_to_ucs4(&(encoded[0]), - encoded.size(), doc_language->encoding()->iconvName()); - std::vector::const_iterator const end = faked.end(); - std::vector::const_iterator it = faked.begin(); - for (; it != end; ++it) - ucs4.put(*it); - } - return ++pit; } @@ -646,4 +593,24 @@ void latexParagraphs(Buffer const & buf, } +int switchEncoding(odocstream & os, BufferParams const & bparams, + Encoding const & oldEnc, Encoding const & newEnc) +{ + // FIXME thailatex does not support the inputenc package, so we + // ignore switches from/to tis620-0 encoding here. This does of + // course only work as long as the non-thai text contains ASCII + // only, but it is the best we can do. + if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() && + oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") { + lyxerr[Debug::LATEX] << "Changing LaTeX encoding from " + << oldEnc.name() << " to " + << newEnc.name() << endl; + os << setEncoding(newEnc.iconvName()); + docstring const inputenc(from_ascii(newEnc.latexName())); + os << "\\inputencoding{" << inputenc << '}'; + return 16 + inputenc.length(); + } + return 0; +} + } // namespace lyx diff --git a/src/output_latex.h b/src/output_latex.h index 185b737d20..5544a63356 100644 --- a/src/output_latex.h +++ b/src/output_latex.h @@ -20,11 +20,16 @@ namespace lyx { class Buffer; +class BufferParams; +class Encoding; class OutputParams; class TexRow; -/// Just a wrapper for the method below, first creating the ofstream. - +/** Export \p paragraphs of buffer \p buf to LaTeX. + Don't use a temporary stringstream for \p os if the final output is + supposed to go to a file. + \sa Buffer::writeLaTeXSource for the reason. + */ void latexParagraphs(Buffer const & buf, ParagraphList const & paragraphs, odocstream & ofs, @@ -32,6 +37,10 @@ void latexParagraphs(Buffer const & buf, OutputParams const &, std::string const & everypar = std::string()); +/// Switch the encoding of \p os from \p oldEnc to \p newEnc if needed. +/// \return the number of characters written to \p os. +int switchEncoding(odocstream & os, BufferParams const & bparams, + Encoding const & oldEnc, Encoding const & newEnc); } // namespace lyx diff --git a/src/paragraph.C b/src/paragraph.C index 7a9a79e5e5..9f5083961f 100644 --- a/src/paragraph.C +++ b/src/paragraph.C @@ -761,7 +761,7 @@ int adjust_column_count(string const & str, int oldcol) // This could go to ParagraphParameters if we want to int Paragraph::startTeXParParams(BufferParams const & bparams, - odocstream & os, bool moving_arg) const + odocstream & os, bool moving_arg) const { int column = 0; @@ -825,7 +825,7 @@ int Paragraph::startTeXParParams(BufferParams const & bparams, // This could go to ParagraphParameters if we want to int Paragraph::endTeXParParams(BufferParams const & bparams, - odocstream & os, bool moving_arg) const + odocstream & os, bool moving_arg) const { int column = 0; @@ -913,6 +913,7 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, // As long as we are in the label, this font is the base font of the // label. Before the first body character it is set to the base font // of the body. + // This must be identical to basefont in TeXOnePar(). LyXFont basefont; LaTeXFeatures features(buf, bparams, runparams); @@ -964,7 +965,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, if (i == body_pos) { if (body_pos > 0) { if (open_font) { - column += running_font.latexWriteEndChanges(os, basefont, basefont); + column += running_font.latexWriteEndChanges( + os, basefont, basefont, bparams); open_font = false; } basefont = getLayoutFont(bparams, outerfont); @@ -1004,9 +1006,10 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, (font != running_font || font.language() != running_font.language())) { - column += running_font.latexWriteEndChanges(os, - basefont, - (i == body_pos-1) ? basefont : font); + column += running_font.latexWriteEndChanges( + os, basefont, + (i == body_pos-1) ? basefont : font, + bparams); running_font = basefont; open_font = false; } @@ -1025,8 +1028,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, font.language() != running_font.language()) && i != body_pos - 1) { - column += font.latexWriteStartChanges(os, basefont, - last_font); + column += font.latexWriteStartChanges( + os, basefont, last_font, bparams); running_font = font; open_font = true; } @@ -1062,11 +1065,11 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, if (next_) { running_font .latexWriteEndChanges(os, basefont, - next_->getFont(bparams, - 0, outerfont)); + next_->getFont(bparams, 0, outerfont), + bparams); } else { running_font.latexWriteEndChanges(os, basefont, - basefont); + basefont, bparams); } #else #ifdef WITH_WARNINGS @@ -1074,7 +1077,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf, //#warning there as we start another \selectlanguage with the next paragraph if //#warning we are in need of this. This should be fixed sometime (Jug) #endif - running_font.latexWriteEndChanges(os, basefont, basefont); + running_font.latexWriteEndChanges(os, basefont, basefont, + bparams); #endif } diff --git a/src/paragraph_pimpl.C b/src/paragraph_pimpl.C index 86028d31aa..1a8cbfef35 100644 --- a/src/paragraph_pimpl.C +++ b/src/paragraph_pimpl.C @@ -483,7 +483,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf, os << '\n'; } else { if (open_font) { - column += running_font.latexWriteEndChanges(os, basefont, basefont); + column += running_font.latexWriteEndChanges( + os, basefont, basefont, bparams); open_font = false; } basefont = owner_->getLayoutFont(bparams, outerfont); @@ -536,10 +537,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf, #endif // some insets cannot be inside a font change command if (open_font && inset->noFontChange()) { - column +=running_font. - latexWriteEndChanges(os, - basefont, - basefont); + column += running_font.latexWriteEndChanges( + os, basefont, basefont, bparams); open_font = false; basefont = owner_->getLayoutFont(bparams, outerfont); running_font = basefont; diff --git a/src/support/docstream.C b/src/support/docstream.C index 9f0b70187c..ac9a8ed230 100644 --- a/src/support/docstream.C +++ b/src/support/docstream.C @@ -294,6 +294,32 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode, open(s, mode); } + +SetEnc setEncoding(string const & encoding) +{ + return SetEnc(encoding); +} + + +odocstream & operator<<(odocstream & os, SetEnc e) +{ + if (std::has_facet(os.rdbuf()->getloc())) { + // This stream must be a file stream, since we never imbue + // any other stream with a locale having a iconv_codecvt_facet. + // Flush the stream so that all pending output is written + // with the old encoding. + os.flush(); + std::locale locale(os.rdbuf()->getloc(), + new iconv_codecvt_facet(e.encoding, std::ios_base::out)); + // FIXME Does changing the codecvt facet of an open file + // stream always work? It does with gcc 4.1, but I have read + // somewhere that it does not with MSVC. + // What does the standard say? + os.imbue(locale); + } + return os; +} + } #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__) diff --git a/src/support/docstream.h b/src/support/docstream.h index 1783fd4271..c3db04c58a 100644 --- a/src/support/docstream.h +++ b/src/support/docstream.h @@ -77,6 +77,25 @@ odocstream & operator<<(odocstream & os, char c) return os; } +/// Helper struct for changing stream encoding +struct SetEnc { + SetEnc(std::string const & e) : encoding(e) {} + std::string encoding; +}; + +/// Helper function for changing stream encoding +SetEnc setEncoding(std::string const & encoding); + +/** Change the encoding of \p os to \p e.encoding. + \p e.encoding must be a valid iconv name of an 8bit encoding. + This does nothing if the stream is not a file stream, since only + file streams do have an associated 8bit encoding. + Usage: \code + os << setEncoding("ISO-8859-1"); + \endcode + */ +odocstream & operator<<(odocstream & os, SetEnc e); + } #endif -- 2.39.2