From 0ddb4d5f30d01684550eee93303b152662b0da79 Mon Sep 17 00:00:00 2001
From: Georg Baum <Georg.Baum@post.rwth-aachen.de>
Date: Sat, 13 Jan 2007 14:36:54 +0000
Subject: [PATCH] Change lyx2lyx conversion and LaTeX export of documents with
 \inputencoding default

	* src/paragraph_pimpl.C
	(isEncoding): Explain why bparams.inputenc == "default" is ignored

	* src/bufferparams.C
	(BufferParams::encoding): Determine the encoding from the language
	for inputenc == "default"

	* src/buffer.h
	(writeLaTeXSource): Mention inputenc == "default" in documentation

	* src/bufferparams.h
	(inputenc): Update documentation of "default"

	* src/output_latex.C
	(switchEncoding): Switch the encoding also for inputenc == "default",
	but don't output \inputencoding commands in that case

	* lib/lyx2lyx/LyX.py
	(get_encoding): Determine the encoding from the language for
	inputencoding == "default"

	* lib/lyx2lyx/lyx_1_5.py
	(convert_multiencoding): ditto

	* development/FORMAT: Update documentation of \inputencoding default


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16667 a592a061-630c-0410-9148-cb99ea01b6c8
---
 development/FORMAT     |  9 ++++++---
 lib/lyx2lyx/LyX.py     |  4 ++--
 lib/lyx2lyx/lyx_1_5.py | 11 ++++++-----
 src/buffer.h           | 11 ++++++-----
 src/bufferparams.C     | 14 ++++----------
 src/bufferparams.h     | 15 +++++++++------
 src/output_latex.C     | 11 +++++++----
 src/paragraph_pimpl.C  |  4 ++++
 8 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/development/FORMAT b/development/FORMAT
index 75b69a8adc..8a54eea112 100644
--- a/development/FORMAT
+++ b/development/FORMAT
@@ -78,11 +78,14 @@ LyX file-format changes
 	encoding of the LyX file:
 
 	\inputencoding       LyX file encoding
-	auto                 as determined by the document language(s)
-	default              unspecified 8bit (treated as latin1 internally,
-	                     see comment in bufferparams.h)
+	auto                 as determined by the document and character
+	                     languages
+	default              ditto
 	everything else      as determined by \inputencoding
 
+	The difference between auto and default is only the LaTeX output:
+	auto causes loading of the inputenc package, default does not.
+
 2006-07-03  Georg Baum  <Georg.Baum@post.rwth-aachen.de>
 
 	* format incremented to 248: Basic booktabs support
diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py
index a21223dac0..b0bdbdbee8 100644
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@@ -112,9 +112,9 @@ def get_encoding(language, inputencoding, format):
     if format > 248:
         return "utf8"
     from lyx2lyx_lang import lang
-    if inputencoding == "auto":        
+    if inputencoding == "auto" or inputencoding == "default":
         return lang[language][3]
-    if inputencoding == "default" or inputencoding == "":
+    if inputencoding == "":
         return "latin1"
     # python does not know the alias latin9
     if inputencoding == "latin9":
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py
index dcd600d00d..8918fb386f 100644
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -219,10 +219,11 @@ def revert_booktabs(document):
 
 def convert_multiencoding(document, forward):
     """ Fix files with multiple encodings.
-Files with an inputencoding of "auto" and multiple languages where at least
-two languages have different default encodings are encoded in multiple
-encodings for file formats < 249. These files are incorrectly read and
-written (as if the whole file was in the encoding of the main language).
+Files with an inputencoding of "auto" or "default" and multiple languages
+where at least two languages have different default encodings are encoded
+in multiple encodings for file formats < 249. These files are incorrectly
+read and written (as if the whole file was in the encoding of the main
+language).
 
 This function
 - converts from fake unicode values to true unicode if forward is true, and
@@ -234,7 +235,7 @@ necessary parsing in modern formats than in ancient ones.
 """
     encoding_stack = [document.encoding]
     lang_re = re.compile(r"^\\lang\s(\S+)")
-    if document.inputencoding == "auto":
+    if document.inputencoding == "auto" or document.inputencoding == "default":
         for i in range(len(document.body)):
             result = lang_re.match(document.body[i])
             if result:
diff --git a/src/buffer.h b/src/buffer.h
index 98d7669eae..66f5f7901d 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -153,11 +153,12 @@ public:
 			   bool output_preamble = true,
 			   bool output_body = true);
 	/** Export the buffer to LaTeX.
-	    If \p os is a file stream, and params().inputenc == "auto", and
-	    the buffer contains text in different languages with more than
-	    one encoding, then this method will change the encoding
-	    associated to \p os. Therefore you must not call this method with
-	    a string stream if the output is supposed to go to a file. \code
+	    If \p os is a file stream, and params().inputenc is "auto" or
+	    "default", and the buffer contains text in different languages
+	    with more than one encoding, then this method will change the
+	    encoding associated to \p os. Therefore you must not call this
+	    method with a string stream if the output is supposed to go to a
+	    file. \code
 	    odocfstream ofs;
 	    ofs.open("test.tex");
 	    writeLaTeXSource(ofs, ...);
diff --git a/src/bufferparams.C b/src/bufferparams.C
index 959d087b79..dff424a9af 100644
--- a/src/bufferparams.C
+++ b/src/bufferparams.C
@@ -1466,20 +1466,14 @@ string const BufferParams::loadFonts(LaTeXFeatures & features, string const & rm
 
 Encoding const & BufferParams::encoding() const
 {
-	if (inputenc == "auto")
+	if (inputenc == "auto" || inputenc == "default")
 		return *(language->encoding());
-	Encoding const * const enc = (inputenc == "default") ?
-		encodings.getFromLyXName("iso8859-1") :
+	Encoding const * const enc =
 		encodings.getFromLaTeXName(inputenc);
 	if (enc)
 		return *enc;
-	if (inputenc == "default")
-		lyxerr << "Could not find iso8859-1 encoding for inputenc "
-		          "value `default'. Using inputenc `auto' instead."
-		       << endl;
-	else
-		lyxerr << "Unknown inputenc value `" << inputenc
-		       << "'. Using `auto' instead." << endl;
+	lyxerr << "Unknown inputenc value `" << inputenc
+	       << "'. Using `auto' instead." << endl;
 	return *(language->encoding());
 }
 
diff --git a/src/bufferparams.h b/src/bufferparams.h
index 28599a93ac..99d4007765 100644
--- a/src/bufferparams.h
+++ b/src/bufferparams.h
@@ -178,15 +178,18 @@ public:
 	BranchList const & branchlist() const;
 	/**
 	 * The input encoding for LaTeX. This can be one of
-	 * - auto: find out the input encoding from the used languages
-	 * - default: Don't load the inputenc package and hope that it will
-	 *   work (unlikely). The encoding is an unspecified 8bit encoding,
-	 *   the interpretation is up to the LaTeX compiler. Because we need
-	 *   a rule how to create this from our internal UCS4 encoded
-	 *   document contents we treat this as latin1 internally.
+	 * - \c auto: find out the input encoding from the used languages
+	 * - \c default: ditto
 	 * - any encoding supported by the inputenc package
 	 * The encoding of the LyX file is always utf8 and has nothing to
 	 * do with this setting.
+	 * The difference between \c auto and \c default is that \c auto also
+	 * causes loading of the inputenc package, while \c default does not.
+	 * \c default will not work unless the user takes additional measures
+	 * (such as using special environments like the CJK environment from
+	 * CJK.sty).
+	 * \c default can be seen as an unspecified 8bit encoding, since LyX
+	 * does not interpret it in any way apart from display on screen.
 	 */
 	std::string inputenc;
 	/// The main encoding used by this buffer for LaTeX output.
diff --git a/src/output_latex.C b/src/output_latex.C
index 59a69511c5..383da47b2b 100644
--- a/src/output_latex.C
+++ b/src/output_latex.C
@@ -600,15 +600,18 @@ int switchEncoding(odocstream & os, BufferParams const & bparams,
 	// ignore switches from/to tis620-0 encoding here. This does of
 	// course only work as long as the non-thai text contains ASCII
 	// only, but it is the best we can do.
-	if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() &&
+	if ((bparams.inputenc == "auto" || bparams.inputenc == "default") &&
+	    oldEnc.name() != newEnc.name() &&
 	    oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
 		lyxerr[Debug::LATEX] << "Changing LaTeX encoding from "
 		                     << oldEnc.name() << " to "
 		                     << newEnc.name() << endl;
 		os << setEncoding(newEnc.iconvName());
-		docstring const inputenc(from_ascii(newEnc.latexName()));
-		os << "\\inputencoding{" << inputenc << '}';
-		return 16 + inputenc.length();
+		if (bparams.inputenc != "default") {
+			docstring const inputenc(from_ascii(newEnc.latexName()));
+			os << "\\inputencoding{" << inputenc << '}';
+			return 16 + inputenc.length();
+		}
 	}
 	return 0;
 }
diff --git a/src/paragraph_pimpl.C b/src/paragraph_pimpl.C
index 1a8cbfef35..0a3b64715b 100644
--- a/src/paragraph_pimpl.C
+++ b/src/paragraph_pimpl.C
@@ -62,6 +62,10 @@ size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
 bool isEncoding(BufferParams const & bparams, LyXFont const & font,
 		string const & encoding)
 {
+	// We do ignore bparams.inputenc == "default" here because characters
+	// in this encoding could be treated by TeX as something different,
+	// e.g. if they are inside a CJK environment. See also
+	// http://bugzilla.lyx.org/show_bug.cgi?id=3043.
 	return (bparams.inputenc == encoding
 		|| (bparams.inputenc == "auto"
 		    && font.language()->encoding()->latexName() == encoding));
-- 
2.39.2