]> git.lyx.org Git - features.git/commitdiff
Sanitize CJK support (fix bug 3561, 4349 and 4337):
authorJürgen Spitzmüller <spitz@lyx.org>
Sat, 8 Dec 2007 11:21:00 +0000 (11:21 +0000)
committerJürgen Spitzmüller <spitz@lyx.org>
Sat, 8 Dec 2007 11:21:00 +0000 (11:21 +0000)
* src/output_latex.{cpp, h}:
- Many changes in order to allow CJK in a mulitlingual context.
* src/Paragraph.cpp:
- Fix file encoding switch and language nesting with CJK.
* src/Buffer.cpp:
- Move the opening and closing CJK and lanaguage tags to ouput_latex.cpp.
* src/Font.cpp:
- switchEncoding call now takes runparams as argument.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@22011 a592a061-630c-0410-9148-cb99ea01b6c8

src/Buffer.cpp
src/Font.cpp
src/Paragraph.cpp
src/output_latex.cpp
src/output_latex.h

index f888232ddf3fbcdb50bfeb1d76828782ea102f51..bf0a5dced83357661745881c05e37d530e053a6f 100644 (file)
@@ -1129,26 +1129,6 @@ void Buffer::writeLaTeXSource(odocstream & os,
        
        LYXERR(Debug::INFO, "preamble finished, now the body.");
 
-       if (!lyxrc.language_auto_begin &&
-           !params().language->babel().empty()) {
-               // FIXME UNICODE
-               os << from_utf8(subst(lyxrc.language_command_begin,
-                                          "$$lang",
-                                          params().language->babel()))
-                  << '\n';
-               d->texrow.newline();
-       }
-
-       Encoding const & encoding = params().encoding();
-       if (encoding.package() == Encoding::CJK) {
-               // Open a CJK environment, since in contrast to the encodings
-               // handled by inputenc the document encoding is not set in
-               // the preamble if it is handled by CJK.sty.
-               os << "\\begin{CJK}{" << from_ascii(encoding.latexName())
-                  << "}{}\n";
-               d->texrow.newline();
-       }
-
        // if we are doing a real file with body, even if this is the
        // child of some other buffer, let's cut the link here.
        // This happens for example if only a child document is printed.
@@ -1171,23 +1151,6 @@ void Buffer::writeLaTeXSource(odocstream & os,
        os << endl;
        d->texrow.newline();
 
-       if (encoding.package() == Encoding::CJK) {
-               // Close the open CJK environment.
-               // latexParagraphs will have opened one even if the last text
-               // was not CJK.
-               os << "\\end{CJK}\n";
-               d->texrow.newline();
-       }
-
-       if (!lyxrc.language_auto_end &&
-           !params().language->babel().empty()) {
-               os << from_utf8(subst(lyxrc.language_command_end,
-                                          "$$lang",
-                                          params().language->babel()))
-                  << '\n';
-               d->texrow.newline();
-       }
-
        if (output_preamble) {
                os << "\\end{document}\n";
                d->texrow.newline();
index ce59fd231b54af77fd375cd7defcae6cad2978c9..7821c58e85685f79818793b7da8832135c302c3f 100644 (file)
@@ -464,7 +464,7 @@ int Font::latexWriteStartChanges(odocstream & os, BufferParams const & bparams,
 
        if (language()->encoding()->package() == Encoding::CJK) {
                pair<bool, int> const c = switchEncoding(os, bparams,
-                               runparams.moving_arg, *(runparams.encoding),
+                               runparams, *(runparams.encoding),
                                *(language()->encoding()));
                if (c.first) {
                        open_encoding_ = true;
@@ -635,7 +635,7 @@ int Font::latexWriteEndChanges(odocstream & os, BufferParams const & bparams,
                // to do correct environment nesting
                Encoding const * const ascii = encodings.getFromLyXName("ascii");
                pair<bool, int> const c = switchEncoding(os, bparams,
-                               runparams.moving_arg, *(runparams.encoding),
+                               runparams, *(runparams.encoding),
                                *ascii);
                BOOST_ASSERT(c.first);
                count += c.second;
index b28082f83e4de5e6071756de9e9f389ec78d8f18..42c4bec1ccd3a879f2c560640b5b61c68e58a327 100644 (file)
@@ -67,6 +67,7 @@ namespace lyx {
 using support::contains;
 using support::lowercase;
 using support::prefixIs;
+using support::subst;
 using support::suffixIs;
 using support::rsplit;
 using support::uppercase;
@@ -1962,12 +1963,22 @@ bool Paragraph::latex(Buffer const & buf,
                        open_font = false;
                }
 
+               // close babel's font environment before opening CJK.
+               if (!running_font.language()->babel().empty() &&
+                   font.language()->encoding()->package() == Encoding::CJK) {
+                               string end_tag = subst(lyxrc.language_command_end,
+                                                       "$$lang",
+                                                       running_font.language()->babel());
+                               os << from_ascii(end_tag);
+                               column += end_tag.length();
+               }
+
                // Switch file encoding if necessary (and allowed)
                if (!runparams.verbatim && 
-                   runparams.encoding->package() == Encoding::inputenc &&
-                   font.language()->encoding()->package() == Encoding::inputenc) {
+                   runparams.encoding->package() == Encoding::none &&
+                   font.language()->encoding()->package() == Encoding::none) {
                        std::pair<bool, int> const enc_switch = switchEncoding(os, bparams,
-                                       runparams.moving_arg, *(runparams.encoding),
+                                       runparams, *(runparams.encoding),
                                        *(font.language()->encoding()));
                        if (enc_switch.first) {
                                column += enc_switch.second;
index 47ae3f8be74a2dd9fcf3925bac50b581cbd7460e..e953d91d9de82e7cdc7426eab72ec4f8864e6c54 100644 (file)
@@ -47,6 +47,17 @@ using std::make_pair;
 
 namespace {
 
+
+enum OpenEncoding {
+               none,
+               inputenc,
+               CJK
+       };
+
+static int open_encoding_ = none;
+static bool cjk_inherited_ = false;
+
+
 ParagraphList::const_iterator
 TeXEnvironment(Buffer const & buf,
               ParagraphList const & paragraphs,
@@ -165,6 +176,18 @@ TeXEnvironment(Buffer const & buf,
                        os << from_ascii(style->latexparam()) << '\n';
                texrow.newline();
        }
+
+       // in multilingual environments, the CJK tags have to be nested properly
+       bool cjk_nested = false;
+       if (par_language->encoding()->package() == Encoding::CJK &&
+           open_encoding_ != CJK && pit->isMultiLingual(bparams)) {
+               os << "\\begin{CJK}{" << from_ascii(par_language->encoding()->latexName())
+                  << "}{}%\n";
+               open_encoding_ = CJK;
+               cjk_nested = true;
+               texrow.newline();
+       }
+
        ParagraphList::const_iterator par = pit;
        do {
                par = TeXOnePar(buf, paragraphs, par, os, texrow, runparams);
@@ -203,6 +226,14 @@ TeXEnvironment(Buffer const & buf,
                 && par->params().depth() == pit->params().depth()
                 && par->params().leftIndent() == pit->params().leftIndent());
 
+       if (open_encoding_ == CJK && cjk_nested) {
+               // We need to close the encoding even if it does not change
+               // to do correct environment nesting
+               os << "\\end{CJK}\n";
+               texrow.newline();
+               open_encoding_ = none;
+       }
+
        if (style->isEnvironment()) {
                os << "\\end{" << from_ascii(style->latexname()) << "}\n";
                texrow.newline();
@@ -282,6 +313,32 @@ TeXOnePar(Buffer const & buf,
        OutputParams runparams = runparams_in;
        runparams.moving_arg |= style->needprotect;
 
+       // we are at the beginning of an inset and CJK is already open.
+       if (pit == paragraphs.begin() && runparams.local_font != 0 &&
+           open_encoding_ == CJK) {
+               cjk_inherited_ = true;
+               open_encoding_ = none;
+       }
+
+       if (pit == paragraphs.begin() && runparams.local_font == 0) {
+               // Open a CJK environment at the beginning of the main buffer
+               // if the document's language is a CJK language
+               if (bparams.encoding().package() == Encoding::CJK) {
+                       os << "\\begin{CJK}{" << from_ascii(bparams.encoding().latexName())
+                       << "}{}%\n";
+                       texrow.newline();
+                       open_encoding_ = CJK;
+               }
+               if (!lyxrc.language_auto_begin && !bparams.language->babel().empty()) {
+                       // FIXME UNICODE
+                       os << from_utf8(subst(lyxrc.language_command_begin,
+                                            "$$lang",
+                                            bparams.language->babel()))
+                          << '\n';
+               texrow.newline();
+               }
+       }
+
        // This paragraph's language
        Language const * const par_language = pit->getParLanguage(bparams);
        // The document's language
@@ -363,13 +420,16 @@ TeXOnePar(Buffer const & buf,
                                else
                                        os << "\\L{";
                        }
-                       os << from_ascii(subst(
-                               lyxrc.language_command_begin,
-                               "$$lang",
-                               par_language->babel()))
-                          // the '%' is necessary to prevent unwanted whitespace
-                          << "%\n";
-                       texrow.newline();
+                       // With CJK, the CJK tag has to be closed first (see below)
+                       if (runparams.encoding->package() != Encoding::CJK) {
+                               os << from_ascii(subst(
+                                       lyxrc.language_command_begin,
+                                       "$$lang",
+                                       par_language->babel()))
+                                  // the '%' is necessary to prevent unwanted whitespace
+                                  << "%\n";
+                               texrow.newline();
+                       }
                }
        }
 
@@ -377,14 +437,14 @@ TeXOnePar(Buffer const & buf,
        // encoding, since this only affects the position of the outputted
        // \inputencoding command; the encoding switch will occur when necessary
        if (bparams.inputenc == "auto" &&
-           runparams.encoding->package() == Encoding::inputenc) {
+           runparams.encoding->package() != Encoding::none) {
                // Look ahead for future encoding changes.
                // We try to output them at the beginning of the paragraph,
                // since the \inputencoding command is not allowed e.g. in
                // sections.
                for (pos_type i = 0; i < pit->size(); ++i) {
                        char_type const c = pit->getChar(i);
-                       if (c < 0x80)
+                       if (runparams.encoding->package() == Encoding::inputenc && c < 0x80)
                                continue;
                        if (pit->isInset(i))
                                break;
@@ -393,17 +453,44 @@ TeXOnePar(Buffer const & buf,
                        // encoding to that required by the language of c.
                        Encoding const * const encoding =
                                pit->getFontSettings(bparams, i).language()->encoding();
-                       pair<bool, int> enc_switch = switchEncoding(os, bparams, false,
+
+                       // with CJK, only add switch if we have CJK content at the beginning
+                       // of the paragraph
+                       if (encoding->package() != Encoding::CJK || i == 0) {
+                               OutputParams tmp_rp = runparams;
+                               runparams.moving_arg = false;
+                               pair<bool, int> enc_switch = switchEncoding(os, bparams, runparams,
                                        *(runparams.encoding), *encoding);
-                       if (encoding->package() == Encoding::inputenc && enc_switch.first) {
-                               runparams.encoding = encoding;
-                               if (enc_switch.second > 0) {
-                                       // the '%' is necessary to prevent unwanted whitespace
-                                       os << "%\n";
+                               runparams = tmp_rp;
+                               // the following is necessary after a CJK environment in a multilingual
+                               // context (nesting issue).
+                               if (par_language->encoding()->package() == Encoding::CJK &&
+                                   open_encoding_ != CJK && !cjk_inherited_) {
+                                       os << "\\begin{CJK}{" << from_ascii(par_language->encoding()->latexName())
+                                          << "}{}%\n";
+                                       open_encoding_ = CJK;
                                        texrow.newline();
                                }
+                               if (encoding->package() != Encoding::none && enc_switch.first) {
+                                       if (enc_switch.second > 0) {
+                                               // the '%' is necessary to prevent unwanted whitespace
+                                               os << "%\n";
+                                               texrow.newline();
+                                       }
+                                       // With CJK, the CJK tag had to be closed first (see above)
+                                       if (runparams.encoding->package() == Encoding::CJK) {
+                                               os << from_ascii(subst(
+                                                       lyxrc.language_command_begin,
+                                                       "$$lang",
+                                                       par_language->babel()))
+                                               // the '%' is necessary to prevent unwanted whitespace
+                                               << "%\n";
+                                               texrow.newline();
+                                       }
+                                       runparams.encoding = encoding;
+                               }
+                               break;
                        }
-                       break;
                }
        }
 
@@ -561,20 +648,23 @@ TeXOnePar(Buffer const & buf,
                        os << '\n';
                        texrow.newline();
                }
-               if (lyxrc.language_command_end.empty()) {
-                       if (!prev_language->babel().empty()) {
+               // when the paragraph uses CJK, the language has to be closed earlier
+               if (font.language()->encoding()->package() != Encoding::CJK) {
+                       if (lyxrc.language_command_end.empty()) {
+                               if (!prev_language->babel().empty()) {
+                                       os << from_ascii(subst(
+                                               lyxrc.language_command_begin,
+                                               "$$lang",
+                                               prev_language->babel()));
+                                       pending_newline = true;
+                               }
+                       } else if (!par_language->babel().empty()) {
                                os << from_ascii(subst(
-                                       lyxrc.language_command_begin,
+                                       lyxrc.language_command_end,
                                        "$$lang",
-                                       prev_language->babel()));
+                                       par_language->babel()));
                                pending_newline = true;
                        }
-               } else if (!par_language->babel().empty()) {
-                       os << from_ascii(subst(
-                               lyxrc.language_command_end,
-                               "$$lang",
-                               par_language->babel()));
-                       pending_newline = true;
                }
        }
        if (closing_rtl_ltr_environment)
@@ -585,6 +675,56 @@ TeXOnePar(Buffer const & buf,
                texrow.newline();
        }
 
+       // if this is a CJK-paragraph and the next isn't, close CJK
+       // also if the next paragraph is a multilingual environment (because of nesting)
+       if (boost::next(pit) != paragraphs.end() && open_encoding_ == CJK &&
+           (boost::next(pit)->getParLanguage(bparams)->encoding()->package() != Encoding::CJK ||
+            boost::next(pit)->layout()->isEnvironment() && boost::next(pit)->isMultiLingual(bparams))
+            // in environments, CJK has to be closed later (nesting!)
+            && !style->isEnvironment()) {
+               os << "\\end{CJK}\n";
+               open_encoding_ = none;
+       }
+
+       // If this is the last paragraph, close the CJK environment
+       // if necessary. If it's an environment, we'll have to \end that first.
+       if (boost::next(pit) == paragraphs.end() && !style->isEnvironment()) {
+               switch (open_encoding_) {
+                       case CJK: {
+                               // end of main text
+                               if (runparams.local_font == 0) {
+                                       os << '\n';
+                                       texrow.newline();
+                                       os << "\\end{CJK}\n";
+                                       texrow.newline();
+                               // end of an inset
+                               } else
+                                       os << "\\end{CJK}";
+                               open_encoding_ = none;
+                               break;
+                       }
+                       case inputenc: {
+                               os << "\\egroup";
+                               open_encoding_ = none;
+                               break;
+                       }
+                       case none:
+                       default:
+                               // do nothing
+                               break;
+               }
+               // auto_end tag only if the last par is in a babel language
+               if (runparams.local_font == 0 && !lyxrc.language_auto_end && 
+                   !bparams.language->babel().empty() &&
+                   font.language()->encoding()->package() != Encoding::CJK) {
+                       os << from_utf8(subst(lyxrc.language_command_end,
+                                             "$$lang",
+                                             bparams.language->babel()))
+                          << '\n';
+                       texrow.newline();
+               }
+       }
+
        // If this is the last paragraph, and a local_font was set upon entering
        // the inset, the encoding should be set back to that local_font's 
        // encoding. We don't use switchEncoding(), because no explicit encoding
@@ -714,13 +854,26 @@ void latexParagraphs(Buffer const & buf,
                                }
                texrow.newline();
        }
+       // If the last paragraph is an environment, we'll have to close
+       // CJK at the very end to do proper nesting.
+       if (open_encoding_ == CJK) {
+               os << "\\end{CJK}\n";
+               texrow.newline();
+               open_encoding_ = none;
+       }
+       // reset inherited encoding
+       if (cjk_inherited_) {
+               open_encoding_ = CJK;
+               cjk_inherited_ = false;
+       }
 }
 
 
 pair<bool, int> switchEncoding(odocstream & os, BufferParams const & bparams,
-                  bool moving_arg, Encoding const & oldEnc,
+                  OutputParams const & runparams, Encoding const & oldEnc,
                   Encoding const & newEnc)
 {
+       bool moving_arg = runparams.moving_arg;
        if ((bparams.inputenc != "auto" && bparams.inputenc != "default")
                || moving_arg)
                return make_pair(false, 0);
@@ -744,32 +897,55 @@ pair<bool, int> switchEncoding(odocstream & os, BufferParams const & bparams,
        if (bparams.inputenc == "default")
                return make_pair(true, 0);
 
-       docstring const inputenc = from_ascii(newEnc.latexName());
+       docstring const inputenc_arg(from_ascii(newEnc.latexName()));
        switch (newEnc.package()) {
                case Encoding::none:
                        // shouldn't ever reach here, see above
                        return make_pair(true, 0);
                case Encoding::inputenc: {
-                       int count = inputenc.length();
-                       if (oldEnc.package() == Encoding::CJK) {
+                       int count = inputenc_arg.length();
+                       if (oldEnc.package() == Encoding::CJK &&
+                           open_encoding_ == CJK) {
                                os << "\\end{CJK}";
+                               open_encoding_ = none;
                                count += 9;
                        }
-                       os << "\\inputencoding{" << inputenc << '}';
+                       else if (oldEnc.package() == Encoding::inputenc &&
+                                open_encoding_ == inputenc) {
+                               os << "\\egroup";
+                               open_encoding_ = none;
+                               count += 7;
+                       }
+                       if (runparams.local_font != 0 && oldEnc.package() == Encoding::CJK) {
+                               // within insets, \inputenc switches need to be 
+                               // embraced within \bgroup ... \egroup; else CJK fails.
+                               os << "\\bgroup";
+                               count += 7;
+                               open_encoding_ = inputenc;
+                       }
+                       os << "\\inputencoding{" << inputenc_arg << '}';
                        return make_pair(true, count + 16);
                }
                case Encoding::CJK: {
-                       int count = inputenc.length();
-                       if (oldEnc.package() == Encoding::CJK) {
+                       int count = inputenc_arg.length();
+                       if (oldEnc.package() == Encoding::CJK && 
+                           open_encoding_ == CJK) {
                                os << "\\end{CJK}";
                                count += 9;
                        }
-                       os << "\\begin{CJK}{" << inputenc << "}{}";
+                       if (oldEnc.package() == Encoding::inputenc && 
+                           open_encoding_ == inputenc) {
+                               os << "\\egroup";
+                               count += 7;
+                       }
+                       os << "\\begin{CJK}{" << inputenc_arg << "}{}";
+                       open_encoding_ = CJK;
                        return make_pair(true, count + 15);
                }
        }
        // Dead code to avoid a warning:
        return make_pair(true, 0);
+
 }
 
 } // namespace lyx
index 008564ed7942aede56fc44e352ad99be6d46bb64..db7f4ebb2d1e60550e511b0f6a8bd948023fc312 100644 (file)
@@ -48,7 +48,7 @@ void latexParagraphs(Buffer const & buf,
 /// \return (did the encoding change?, number of characters written to \p os)
 std::pair<bool, int> switchEncoding(odocstream & os, 
                     BufferParams const & bparams,
-                    bool moving_arg, Encoding const & oldEnc,
+                    OutputParams const &, Encoding const & oldEnc,
                     Encoding const & newEnc);
 
 } // namespace lyx