From 0ddb4d5f30d01684550eee93303b152662b0da79 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sat, 13 Jan 2007 14:36:54 +0000 Subject: [PATCH] Change lyx2lyx conversion and LaTeX export of documents with \inputencoding default * src/paragraph_pimpl.C (isEncoding): Explain why bparams.inputenc == "default" is ignored * src/bufferparams.C (BufferParams::encoding): Determine the encoding from the language for inputenc == "default" * src/buffer.h (writeLaTeXSource): Mention inputenc == "default" in documentation * src/bufferparams.h (inputenc): Update documentation of "default" * src/output_latex.C (switchEncoding): Switch the encoding also for inputenc == "default", but don't output \inputencoding commands in that case * lib/lyx2lyx/LyX.py (get_encoding): Determine the encoding from the language for inputencoding == "default" * lib/lyx2lyx/lyx_1_5.py (convert_multiencoding): ditto * development/FORMAT: Update documentation of \inputencoding default git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16667 a592a061-630c-0410-9148-cb99ea01b6c8 --- development/FORMAT | 9 ++++++--- lib/lyx2lyx/LyX.py | 4 ++-- lib/lyx2lyx/lyx_1_5.py | 11 ++++++----- src/buffer.h | 11 ++++++----- src/bufferparams.C | 14 ++++---------- src/bufferparams.h | 15 +++++++++------ src/output_latex.C | 11 +++++++---- src/paragraph_pimpl.C | 4 ++++ 8 files changed, 44 insertions(+), 35 deletions(-) diff --git a/development/FORMAT b/development/FORMAT index 75b69a8adc..8a54eea112 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -78,11 +78,14 @@ LyX file-format changes encoding of the LyX file: \inputencoding LyX file encoding - auto as determined by the document language(s) - default unspecified 8bit (treated as latin1 internally, - see comment in bufferparams.h) + auto as determined by the document and character + languages + default ditto everything else as determined by \inputencoding + The difference between auto and default is only the LaTeX output: + auto causes loading of the inputenc package, default does not. + 2006-07-03 Georg Baum * format incremented to 248: Basic booktabs support diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py index a21223dac0..b0bdbdbee8 100644 --- a/lib/lyx2lyx/LyX.py +++ b/lib/lyx2lyx/LyX.py @@ -112,9 +112,9 @@ def get_encoding(language, inputencoding, format): if format > 248: return "utf8" from lyx2lyx_lang import lang - if inputencoding == "auto": + if inputencoding == "auto" or inputencoding == "default": return lang[language][3] - if inputencoding == "default" or inputencoding == "": + if inputencoding == "": return "latin1" # python does not know the alias latin9 if inputencoding == "latin9": diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index dcd600d00d..8918fb386f 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -219,10 +219,11 @@ def revert_booktabs(document): def convert_multiencoding(document, forward): """ Fix files with multiple encodings. -Files with an inputencoding of "auto" and multiple languages where at least -two languages have different default encodings are encoded in multiple -encodings for file formats < 249. These files are incorrectly read and -written (as if the whole file was in the encoding of the main language). +Files with an inputencoding of "auto" or "default" and multiple languages +where at least two languages have different default encodings are encoded +in multiple encodings for file formats < 249. These files are incorrectly +read and written (as if the whole file was in the encoding of the main +language). This function - converts from fake unicode values to true unicode if forward is true, and @@ -234,7 +235,7 @@ necessary parsing in modern formats than in ancient ones. """ encoding_stack = [document.encoding] lang_re = re.compile(r"^\\lang\s(\S+)") - if document.inputencoding == "auto": + if document.inputencoding == "auto" or document.inputencoding == "default": for i in range(len(document.body)): result = lang_re.match(document.body[i]) if result: diff --git a/src/buffer.h b/src/buffer.h index 98d7669eae..66f5f7901d 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -153,11 +153,12 @@ public: bool output_preamble = true, bool output_body = true); /** Export the buffer to LaTeX. - If \p os is a file stream, and params().inputenc == "auto", and - the buffer contains text in different languages with more than - one encoding, then this method will change the encoding - associated to \p os. Therefore you must not call this method with - a string stream if the output is supposed to go to a file. \code + If \p os is a file stream, and params().inputenc is "auto" or + "default", and the buffer contains text in different languages + with more than one encoding, then this method will change the + encoding associated to \p os. Therefore you must not call this + method with a string stream if the output is supposed to go to a + file. \code odocfstream ofs; ofs.open("test.tex"); writeLaTeXSource(ofs, ...); diff --git a/src/bufferparams.C b/src/bufferparams.C index 959d087b79..dff424a9af 100644 --- a/src/bufferparams.C +++ b/src/bufferparams.C @@ -1466,20 +1466,14 @@ string const BufferParams::loadFonts(LaTeXFeatures & features, string const & rm Encoding const & BufferParams::encoding() const { - if (inputenc == "auto") + if (inputenc == "auto" || inputenc == "default") return *(language->encoding()); - Encoding const * const enc = (inputenc == "default") ? - encodings.getFromLyXName("iso8859-1") : + Encoding const * const enc = encodings.getFromLaTeXName(inputenc); if (enc) return *enc; - if (inputenc == "default") - lyxerr << "Could not find iso8859-1 encoding for inputenc " - "value `default'. Using inputenc `auto' instead." - << endl; - else - lyxerr << "Unknown inputenc value `" << inputenc - << "'. Using `auto' instead." << endl; + lyxerr << "Unknown inputenc value `" << inputenc + << "'. Using `auto' instead." << endl; return *(language->encoding()); } diff --git a/src/bufferparams.h b/src/bufferparams.h index 28599a93ac..99d4007765 100644 --- a/src/bufferparams.h +++ b/src/bufferparams.h @@ -178,15 +178,18 @@ public: BranchList const & branchlist() const; /** * The input encoding for LaTeX. This can be one of - * - auto: find out the input encoding from the used languages - * - default: Don't load the inputenc package and hope that it will - * work (unlikely). The encoding is an unspecified 8bit encoding, - * the interpretation is up to the LaTeX compiler. Because we need - * a rule how to create this from our internal UCS4 encoded - * document contents we treat this as latin1 internally. + * - \c auto: find out the input encoding from the used languages + * - \c default: ditto * - any encoding supported by the inputenc package * The encoding of the LyX file is always utf8 and has nothing to * do with this setting. + * The difference between \c auto and \c default is that \c auto also + * causes loading of the inputenc package, while \c default does not. + * \c default will not work unless the user takes additional measures + * (such as using special environments like the CJK environment from + * CJK.sty). + * \c default can be seen as an unspecified 8bit encoding, since LyX + * does not interpret it in any way apart from display on screen. */ std::string inputenc; /// The main encoding used by this buffer for LaTeX output. diff --git a/src/output_latex.C b/src/output_latex.C index 59a69511c5..383da47b2b 100644 --- a/src/output_latex.C +++ b/src/output_latex.C @@ -600,15 +600,18 @@ int switchEncoding(odocstream & os, BufferParams const & bparams, // ignore switches from/to tis620-0 encoding here. This does of // course only work as long as the non-thai text contains ASCII // only, but it is the best we can do. - if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() && + if ((bparams.inputenc == "auto" || bparams.inputenc == "default") && + oldEnc.name() != newEnc.name() && oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") { lyxerr[Debug::LATEX] << "Changing LaTeX encoding from " << oldEnc.name() << " to " << newEnc.name() << endl; os << setEncoding(newEnc.iconvName()); - docstring const inputenc(from_ascii(newEnc.latexName())); - os << "\\inputencoding{" << inputenc << '}'; - return 16 + inputenc.length(); + if (bparams.inputenc != "default") { + docstring const inputenc(from_ascii(newEnc.latexName())); + os << "\\inputencoding{" << inputenc << '}'; + return 16 + inputenc.length(); + } } return 0; } diff --git a/src/paragraph_pimpl.C b/src/paragraph_pimpl.C index 1a8cbfef35..0a3b64715b 100644 --- a/src/paragraph_pimpl.C +++ b/src/paragraph_pimpl.C @@ -62,6 +62,10 @@ size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase); bool isEncoding(BufferParams const & bparams, LyXFont const & font, string const & encoding) { + // We do ignore bparams.inputenc == "default" here because characters + // in this encoding could be treated by TeX as something different, + // e.g. if they are inside a CJK environment. See also + // http://bugzilla.lyx.org/show_bug.cgi?id=3043. return (bparams.inputenc == encoding || (bparams.inputenc == "auto" && font.language()->encoding()->latexName() == encoding)); -- 2.39.2