]> git.lyx.org Git - features.git/commitdiff
Make the output of \inputencoding commands (and the change of output
authorGeorg Baum <Georg.Baum@post.rwth-aachen.de>
Tue, 9 Jan 2007 19:25:40 +0000 (19:25 +0000)
committerGeorg Baum <Georg.Baum@post.rwth-aachen.de>
Tue, 9 Jan 2007 19:25:40 +0000 (19:25 +0000)
encodings) more fine grained: From paragraph level to individual character
level. The inputenc package supports that since at least 2000.

* src/insets/insetbase.h
(latex): Document the differences between output to a string stream
and a file stream

* src/buffer.h
(writeLaTeXSource): ditto

* src/output_latex.h
(latexParagraphs): ditto
(switchEncoding): new function to change the encoding of a stream
(and output \inputencoding commands)

* src/paragraph_pimpl.C
(Paragraph::Pimpl::simpleTeXSpecialC): Adjust to latexWriteEndChanges
changes

* src/support/docstream.[Ch]
(setEncoding, operator<<): New stream modifier that changes the
encoding of file streams

* src/lyxfont.[Ch]
(LyXFont::latexWriteStartChanges): Change stream encoding if needed
(LyXFont::latexWriteEndChanges): Change stream encoding if needed

* src/lyxfont.h
(public:):

* src/paragraph.C
(Paragraph::simpleTeXOnePar): Adjust to latexWriteStartChanges and latexWriteEndChanges changes
(bool Paragraph::simpleTeXOnePar):
(bool Paragraph::simpleTeXOnePar):
(bool Paragraph::simpleTeXOnePar):
(bool Paragraph::simpleTeXOnePar):
(bool Paragraph::simpleTeXOnePar):

* src/output_latex.C
(TeXOnePar): Remove the ugly hack to for wencoding changes and use
switchEncoding instead. A nice side effect is that the old hack would
not work if the main language encoding is latin1 and a character
would be mapped to a cedilla in the "fake ucs4" encoding, because
iconv refuses to convert such a character to latin1, although it
exists in latin1 (it wants to attach it to a base character).

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16633 a592a061-630c-0410-9148-cb99ea01b6c8

src/buffer.h
src/insets/insetbase.h
src/lyxfont.C
src/lyxfont.h
src/output_latex.C
src/output_latex.h
src/paragraph.C
src/paragraph_pimpl.C
src/support/docstream.C
src/support/docstream.h

index cdf07d68c6fb862d99b807792354b4cb8f66fc76..98d7669eae4a6de0f6e7b5e00948ce7adc265aac 100644 (file)
@@ -146,13 +146,31 @@ public:
        /// Write file. Returns \c false if unsuccesful.
        bool writeFile(support::FileName const &) const;
 
-       /// Just a wrapper for the method below, first creating the ofstream.
+       /// Just a wrapper for writeLaTeXSource, first creating the ofstream.
        bool makeLaTeXFile(support::FileName const & filename,
                           std::string const & original_path,
                           OutputParams const &,
                           bool output_preamble = true,
                           bool output_body = true);
-       ///
+       /** Export the buffer to LaTeX.
+           If \p os is a file stream, and params().inputenc == "auto", and
+           the buffer contains text in different languages with more than
+           one encoding, then this method will change the encoding
+           associated to \p os. Therefore you must not call this method with
+           a string stream if the output is supposed to go to a file. \code
+           odocfstream ofs;
+           ofs.open("test.tex");
+           writeLaTeXSource(ofs, ...);
+           ofs.close();
+           \endcode is NOT equivalent to \code
+           odocstringstream oss;
+           writeLaTeXSource(oss, ...);
+           odocfstream ofs;
+           ofs.open("test.tex");
+           ofs << oss.str();
+           ofs.close();
+           \endcode
+        */
        void writeLaTeXSource(odocstream & os,
                           std::string const & original_path,
                           OutputParams const &,
index f4eb679add7c2e610555d675af67ce5ab2c7d251..4f90fc1fa141da4e4da4c0439cb6ef260c3f2cbd 100644 (file)
@@ -369,7 +369,12 @@ public:
        virtual void write(Buffer const &, std::ostream &) const {}
        /// read inset in .lyx format
        virtual void read(Buffer const &, LyXLex &) {}
-       /// returns the number of rows (\n's) of generated tex code.
+       /** Export the inset to LaTeX.
+        *  Don't use a temporary stringstream if the final output is
+        *  supposed to go to a file.
+        *  \sa Buffer::writeLaTeXSource for the reason.
+        *  \return the number of rows (\n's) of generated LaTeX code.
+        */
        virtual int latex(Buffer const &, odocstream &,
                          OutputParams const &) const { return 0; }
        /// returns true to override begin and end inset in file
index fb02fdced841b16cec9b8cb459cc98b659a59982..3b5e883d23ebe492690d6efdac707c37bd1cda4b 100644 (file)
@@ -23,6 +23,7 @@
 #include "LColor.h"
 #include "lyxlex.h"
 #include "lyxrc.h"
+#include "output_latex.h"
 
 #include "support/lstrings.h"
 
@@ -737,11 +738,13 @@ void LyXFont::lyxWriteChanges(LyXFont const & orgfont,
 /// Writes the head of the LaTeX needed to impose this font
 // Returns number of chars written.
 int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base,
-                                   LyXFont const & prev) const
+                                    LyXFont const & prev,
+                                    BufferParams const & bparams) const
 {
-       int count = 0;
        bool env = false;
 
+       int count = switchEncoding(os, bparams, *(prev.language()->encoding()),
+                                  *(language()->encoding()));
        if (language()->babel() != base.language()->babel() &&
            language() != prev.language()) {
                if (isRightToLeft() != prev.isRightToLeft()) {
@@ -833,7 +836,8 @@ int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base,
 // Returns number of chars written
 // This one corresponds to latexWriteStartChanges(). (Asger)
 int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base,
-                                 LyXFont const & next) const
+                                  LyXFont const & next,
+                                  BufferParams const & bparams) const
 {
        int count = 0;
        bool env = false;
@@ -897,6 +901,8 @@ int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base,
                os << '}';
                ++count;
        }
+       count += switchEncoding(os, bparams, *(language()->encoding()),
+                               *(next.language()->encoding()));
 
        return count;
 }
index b8ac5516557aa9f4d300a1c81f7cd273704513c5..0cfaaf24c5d94dbbcd5427f2a5b60f698dce5032 100644 (file)
@@ -300,14 +300,17 @@ public:
            font state active now.
        */
        int latexWriteStartChanges(odocstream &, LyXFont const & base,
-                                  LyXFont const & prev) const;
+                                  LyXFont const & prev,
+                                  BufferParams const &) const;
 
        /** Writes the tail of the LaTeX needed to change to this font.
            Returns number of chars written. Base is the font state we want
            to achieve.
        */
        int latexWriteEndChanges(odocstream &, LyXFont const & base,
-                                LyXFont const & next) const;
+                                LyXFont const & next,
+                                BufferParams const &) const;
+
 
        /// Build GUI description of font state
        docstring const stateText(BufferParams * params) const;
index f7acbf5dedf96e55c0af5ad8c729d95b3a12d893..59a69511c553562698f785b438b9b3d1c97a0387 100644 (file)
@@ -29,7 +29,6 @@
 #include "insets/insetoptarg.h"
 
 #include "support/lstrings.h"
-#include "support/unicode.h"
 
 
 namespace lyx {
@@ -237,7 +236,7 @@ ParagraphList::const_iterator
 TeXOnePar(Buffer const & buf,
          ParagraphList const & paragraphs,
          ParagraphList::const_iterator pit,
-         odocstream & ucs4, TexRow & texrow,
+         odocstream & os, TexRow & texrow,
          OutputParams const & runparams_in,
          string const & everypar)
 {
@@ -275,49 +274,42 @@ TeXOnePar(Buffer const & buf,
                if (!lyxrc.language_command_end.empty() &&
                    previous_language->babel() != doc_language->babel())
                {
-                       ucs4 << from_ascii(subst(lyxrc.language_command_end,
+                       os << from_ascii(subst(lyxrc.language_command_end,
                                "$$lang",
                                previous_language->babel()))
-                            << '\n';
+                          << '\n';
                        texrow.newline();
                }
 
                if (lyxrc.language_command_end.empty() ||
                    language->babel() != doc_language->babel())
                {
-                       ucs4 << from_ascii(subst(
+                       os << from_ascii(subst(
                                lyxrc.language_command_begin,
                                "$$lang",
                                language->babel()))
-                            << '\n';
+                          << '\n';
                        texrow.newline();
                }
        }
 
-       // FIXME thailatex does not support the inputenc package, so we
-       // ignore switches from/to tis620-0 encoding here. This does of
-       // course only work as long as the non-thai text contains ASCII
-       // only, but it is the best we can do.
-       bool const use_thailatex = (language->encoding()->name() == "tis620-0" ||
-                                   previous_language->encoding()->name() == "tis620-0");
-       if (bparams.inputenc == "auto" &&
-           language->encoding() != previous_language->encoding() &&
-           !use_thailatex) {
-               ucs4 << "\\inputencoding{"
-                    << from_ascii(language->encoding()->latexName())
-                    << "}\n";
+       LyXFont const outerfont =
+               outerFont(std::distance(paragraphs.begin(), pit),
+                         paragraphs);
+       // This must be identical to basefont in Paragraph::simpleTeXOnePar
+       LyXFont basefont = (pit->beginOfBody() > 0) ?
+                       pit->getLabelFont(bparams, outerfont) :
+                       pit->getLayoutFont(bparams, outerfont);
+       Encoding const & outer_encoding(*(outerfont.language()->encoding()));
+       // FIXME we switch from the outer encoding to the encoding of
+       // this paragraph, since I could not figure out the correct
+       // logic to take the encoding of the previous paragraph into
+       // account. This may result in some unneeded encoding changes.
+       if (switchEncoding(os, bparams, outer_encoding,
+                          *(basefont.language()->encoding()))) {
+               os << '\n';
                texrow.newline();
        }
-       // We need to output the paragraph to a temporary stream if we
-       // need to change the encoding. Don't do this if the result does
-       // not go to a file but to the builtin source viewer.
-       odocstringstream par_stream;
-       bool const change_encoding = !runparams_in.dryrun &&
-                       bparams.inputenc == "auto" &&
-                       language->encoding() != doc_language->encoding() &&
-                       !use_thailatex;
-       // don't trigger the copy ctor because it's private on msvc 
-       odocstream & os = *(change_encoding ? &par_stream : &ucs4);
 
        // In an inset with unlimited length (all in one row),
        // don't allow any special options in the paragraph
@@ -376,9 +368,6 @@ TeXOnePar(Buffer const & buf,
 
        // FIXME UNICODE
        os << from_utf8(everypar);
-       LyXFont const outerfont =
-               outerFont(std::distance(paragraphs.begin(), pit),
-                         paragraphs);
        bool need_par = pit->simpleTeXOnePar(buf, bparams, outerfont,
                                             os, texrow, runparams);
 
@@ -478,6 +467,17 @@ TeXOnePar(Buffer const & buf,
                texrow.newline();
        }
 
+       // FIXME we switch from the encoding of this paragraph to the
+       // outer encoding, since I could not figure out the correct logic
+       // to take the encoding of the next paragraph into account.
+       // This may result in some unneeded encoding changes.
+       basefont = pit->getLayoutFont(bparams, outerfont);
+       if (switchEncoding(os, bparams, *(basefont.language()->encoding()),
+                          outer_encoding)) {
+               os << '\n';
+               texrow.newline();
+       }
+
        // we don't need it for the last paragraph!!!
        // Note from JMarc: we will re-add a \n explicitely in
        // TeXEnvironment, because it is needed in this case
@@ -490,59 +490,6 @@ TeXOnePar(Buffer const & buf,
            lyxerr.debugging(Debug::LATEX))
                lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl;
 
-       if (change_encoding) {
-               lyxerr[Debug::LATEX] << "Converting paragraph to encoding "
-                       << language->encoding()->iconvName() << endl;
-               docstring const par = par_stream.str();
-               // Convert the paragraph to the 8bit encoding that we need to
-               // output.
-               std::vector<char> const encoded = lyx::ucs4_to_eightbit(par.c_str(),
-                       par.size(), language->encoding()->iconvName());
-               // Interpret this as if it was in the 8 bit encoding of the
-               // document language and convert it back to UCS4. That means
-               // that faked does not contain pure UCS4 anymore, but what
-               // will be written to the output file will be correct, because
-               // the real output stream will do a UCS4 -> document language
-               // encoding conversion.
-               // This is of course a hack, but not a bigger one than mixing
-               // two encodings in one file.
-               // FIXME: Catch iconv conversion errors and display an error
-               // dialog.
-
-               // Here follows an explanation how I (gb) came to the current
-               // solution:
-
-               // codecvt facets are only used by file streams -> OK, maybe
-               // we could use file streams and not generic streams in the
-               // latex() methods? No, that does not  work, we use them at
-               // several places to write to string streams.
-               // Next try: Maybe we could do something else than codecvt
-               // in our streams, and  add a setEncoding() method? That
-               // does not work unless we rebuild the functionality of file
-               // and string streams, since both odocfstream and
-               // odocstringstream inherit from std::basic_ostream<docstring>
-               // and we can  neither add a method to that class nor change
-               // the inheritance of the file and string streams.
-
-               // What might be possible is to encapsulate the real file and
-               // string streams in our own version, and use a homemade
-               // streambuf that would do the encoding conversion and then
-               // forward to the real stream. That would probably work, but
-               // would require far more code and a good understanding of
-               // stream buffers to get it right.
-
-               // Another idea by JMarc is to use a modifier like
-               // os << setencoding("iso-8859-1");
-               // That currently looks like the best idea.
-
-               std::vector<char_type> const faked = lyx::eightbit_to_ucs4(&(encoded[0]),
-                       encoded.size(), doc_language->encoding()->iconvName());
-               std::vector<char_type>::const_iterator const end = faked.end();
-               std::vector<char_type>::const_iterator it = faked.begin();
-               for (; it != end; ++it)
-                       ucs4.put(*it);
-       }
-
        return ++pit;
 }
 
@@ -646,4 +593,24 @@ void latexParagraphs(Buffer const & buf,
 }
 
 
+int switchEncoding(odocstream & os, BufferParams const & bparams,
+                   Encoding const & oldEnc, Encoding const & newEnc)
+{
+       // FIXME thailatex does not support the inputenc package, so we
+       // ignore switches from/to tis620-0 encoding here. This does of
+       // course only work as long as the non-thai text contains ASCII
+       // only, but it is the best we can do.
+       if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() &&
+           oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
+               lyxerr[Debug::LATEX] << "Changing LaTeX encoding from "
+                                    << oldEnc.name() << " to "
+                                    << newEnc.name() << endl;
+               os << setEncoding(newEnc.iconvName());
+               docstring const inputenc(from_ascii(newEnc.latexName()));
+               os << "\\inputencoding{" << inputenc << '}';
+               return 16 + inputenc.length();
+       }
+       return 0;
+}
+
 } // namespace lyx
index 185b737d2096d1cda88608e67c46a5d621ad723c..5544a6335617462bde0ddc82cccaf8976e7ebb6a 100644 (file)
 namespace lyx {
 
 class Buffer;
+class BufferParams;
+class Encoding;
 class OutputParams;
 class TexRow;
 
-/// Just a wrapper for the method below, first creating the ofstream.
-
+/** Export \p paragraphs of buffer \p buf to LaTeX.
+    Don't use a temporary stringstream for \p os if the final output is
+    supposed to go to a file.
+    \sa Buffer::writeLaTeXSource for the reason.
+ */
 void latexParagraphs(Buffer const & buf,
                     ParagraphList const & paragraphs,
                     odocstream & ofs,
@@ -32,6 +37,10 @@ void latexParagraphs(Buffer const & buf,
                     OutputParams const &,
                     std::string const & everypar = std::string());
 
+/// Switch the encoding of \p os from \p oldEnc to \p newEnc if needed.
+/// \return the number of characters written to \p os.
+int switchEncoding(odocstream & os, BufferParams const & bparams,
+                   Encoding const & oldEnc, Encoding const & newEnc);
 
 } // namespace lyx
 
index 7a9a79e5e51e82ad4f0fa9941e9a5f441893e831..9f5083961feb8582f1445d801c0cc521188da0d7 100644 (file)
@@ -761,7 +761,7 @@ int adjust_column_count(string const & str, int oldcol)
 
 // This could go to ParagraphParameters if we want to
 int Paragraph::startTeXParParams(BufferParams const & bparams,
-                                odocstream & os, bool moving_arg) const
+                                 odocstream & os, bool moving_arg) const
 {
        int column = 0;
 
@@ -825,7 +825,7 @@ int Paragraph::startTeXParParams(BufferParams const & bparams,
 
 // This could go to ParagraphParameters if we want to
 int Paragraph::endTeXParParams(BufferParams const & bparams,
-                              odocstream & os, bool moving_arg) const
+                               odocstream & os, bool moving_arg) const
 {
        int column = 0;
 
@@ -913,6 +913,7 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
        // As long as we are in the label, this font is the base font of the
        // label. Before the first body character it is set to the base font
        // of the body.
+       // This must be identical to basefont in TeXOnePar().
        LyXFont basefont;
 
        LaTeXFeatures features(buf, bparams, runparams);
@@ -964,7 +965,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
                if (i == body_pos) {
                        if (body_pos > 0) {
                                if (open_font) {
-                                       column += running_font.latexWriteEndChanges(os, basefont, basefont);
+                                       column += running_font.latexWriteEndChanges(
+                                               os, basefont, basefont, bparams);
                                        open_font = false;
                                }
                                basefont = getLayoutFont(bparams, outerfont);
@@ -1004,9 +1006,10 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
                    (font != running_font ||
                     font.language() != running_font.language()))
                {
-                       column += running_font.latexWriteEndChanges(os,
-                                                                   basefont,
-                                                                   (i == body_pos-1) ? basefont : font);
+                       column += running_font.latexWriteEndChanges(
+                                       os, basefont,
+                                       (i == body_pos-1) ? basefont : font,
+                                       bparams);
                        running_font = basefont;
                        open_font = false;
                }
@@ -1025,8 +1028,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
                     font.language() != running_font.language()) &&
                        i != body_pos - 1)
                {
-                       column += font.latexWriteStartChanges(os, basefont,
-                                                             last_font);
+                       column += font.latexWriteStartChanges(
+                                       os, basefont, last_font, bparams);
                        running_font = font;
                        open_font = true;
                }
@@ -1062,11 +1065,11 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
                if (next_) {
                        running_font
                                .latexWriteEndChanges(os, basefont,
-                                                     next_->getFont(bparams,
-                                                     0, outerfont));
+                                       next_->getFont(bparams, 0, outerfont),
+                                       bparams);
                } else {
                        running_font.latexWriteEndChanges(os, basefont,
-                                                         basefont);
+                                                         basefont, bparams);
                }
 #else
 #ifdef WITH_WARNINGS
@@ -1074,7 +1077,8 @@ bool Paragraph::simpleTeXOnePar(Buffer const & buf,
 //#warning there as we start another \selectlanguage with the next paragraph if
 //#warning we are in need of this. This should be fixed sometime (Jug)
 #endif
-               running_font.latexWriteEndChanges(os, basefont,  basefont);
+               running_font.latexWriteEndChanges(os, basefont, basefont,
+                                                 bparams);
 #endif
        }
 
index 86028d31aa3f7322205876e624513a95281c3617..1a8cbfef35ce704358d6cb1a6e7b335ede563f08 100644 (file)
@@ -483,7 +483,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
                                os << '\n';
                        } else {
                                if (open_font) {
-                                       column += running_font.latexWriteEndChanges(os, basefont, basefont);
+                                       column += running_font.latexWriteEndChanges(
+                                               os, basefont, basefont, bparams);
                                        open_font = false;
                                }
                                basefont = owner_->getLayoutFont(bparams, outerfont);
@@ -536,10 +537,8 @@ void Paragraph::Pimpl::simpleTeXSpecialChars(Buffer const & buf,
 #endif
                // some insets cannot be inside a font change command
                if (open_font && inset->noFontChange()) {
-                       column +=running_font.
-                               latexWriteEndChanges(os,
-                                                    basefont,
-                                                    basefont);
+                       column += running_font.latexWriteEndChanges(
+                                       os, basefont, basefont, bparams);
                        open_font = false;
                        basefont = owner_->getLayoutFont(bparams, outerfont);
                        running_font = basefont;
index 9f0b70187c5a747dbdaa31a3705583b502d43a0a..ac9a8ed23050ac38e91e9c28787dda125151e798 100644 (file)
@@ -294,6 +294,32 @@ odocfstream::odocfstream(const char* s, std::ios_base::openmode mode,
        open(s, mode);
 }
 
+
+SetEnc setEncoding(string const & encoding)
+{
+       return SetEnc(encoding);
+}
+
+
+odocstream & operator<<(odocstream & os, SetEnc e)
+{
+       if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
+               // This stream must be a file stream, since we never imbue
+               // any other stream with a locale having a iconv_codecvt_facet.
+               // Flush the stream so that all pending output is written
+               // with the old encoding.
+               os.flush();
+               std::locale locale(os.rdbuf()->getloc(),
+                       new iconv_codecvt_facet(e.encoding, std::ios_base::out));
+               // FIXME Does changing the codecvt facet of an open file
+               // stream always work? It does with gcc 4.1, but I have read
+               // somewhere that it does not with MSVC.
+               // What does the standard say?
+               os.imbue(locale);
+       }
+       return os;
+}
+
 }
 
 #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)
index 1783fd4271b92fcde6db53dc56ee74ab2e12bbd3..c3db04c58a3a2d2083c55160f6fccc01ee3bdaad 100644 (file)
@@ -77,6 +77,25 @@ odocstream & operator<<(odocstream & os, char c)
     return os;
 }
 
+/// Helper struct for changing stream encoding
+struct SetEnc {
+       SetEnc(std::string const & e) : encoding(e) {}
+       std::string encoding;
+};
+
+/// Helper function for changing stream encoding
+SetEnc setEncoding(std::string const & encoding);
+
+/** Change the encoding of \p os to \p e.encoding.
+    \p e.encoding must be a valid iconv name of an 8bit encoding.
+    This does nothing if the stream is not a file stream, since only
+    file streams do have an associated 8bit encoding.
+    Usage: \code
+    os << setEncoding("ISO-8859-1");
+    \endcode
+ */
+odocstream & operator<<(odocstream & os, SetEnc e);
+
 }
 
 #endif