namespace {
-// "chinese-simplified", "chinese-traditional", "japanese-cjk", "korean"
-// cannot be supported because it is impossible to determine the correct document
-// language if CJK is used.
+// CJK languages are handled in text.cpp, polyglossia languages are listed
+// further down.
/**
* known babel language names (including synonyms)
* not in standard babel: arabic, arabtex, armenian, belarusian, serbian-latin, thai
"uppersorbian", "uppersorbian", "english", "english", "vietnamese", "welsh",
0};
+/// languages with danish quotes (.lyx names)
+const char * const known_danish_quotes_languages[] = {"danish", 0};
+
/// languages with english quotes (.lyx names)
const char * const known_english_quotes_languages[] = {"american", "australian",
"bahasa", "bahasam", "brazilian", "canadian", "chinese-simplified", "english",
vector<string> options = split_options(opts);
add_package(name, options);
string scale;
+ char const * const * where = 0;
if (is_known(name, known_xetex_packages)) {
xetex = true;
}
else if (name == "CJK") {
- // It is impossible to determine the document language if CJK is used.
- // All we can do is to notify the user that he has to set this by himself.
- have_CJK = true;
// set the encoding to "auto" because it might be set to "default" by the babel handling
// and this would not be correct for CJK
if (h_inputencoding == "default")
else if (name == "subfig")
; // ignore this FIXME: Use the package separator mechanism instead
- else if (is_known(name, known_languages))
- h_language = name;
+ else if ((where = is_known(name, known_languages)))
+ h_language = known_coded_languages[where - known_languages];
else if (name == "natbib") {
h_biblio_style = "plainnat";
// http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage
// (quotes for kazakh and interlingua are unknown)
// danish
- if (h_language == "danish")
+ if (is_known(h_language, known_danish_quotes_languages))
h_quotes_language = "danish";
// french
else if (is_known(h_language, known_french_quotes_languages))
ss << tc.sides();
h_papersides = ss.str();
}
+
+ // If the CJK package is used we cannot set the document language from
+ // the babel options. Instead, we guess which language is used most
+ // and set this one.
+ default_language = h_language;
+ if (is_full_document && auto_packages.find("CJK") != auto_packages.end()) {
+ p.pushPosition();
+ h_language = guessLanguage(p, default_language);
+ p.popPosition();
+ }
}
* the same as supported_CJK_encodings with their corresponding LyX language name
* please keep this in sync with supported_CJK_encodings line by line!
*/
-const char * const coded_supported_CJK_encodings[] = {
+const char * const supported_CJK_languages[] = {
"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0};
-string CJK2lyx(string const & encoding)
-{
- char const * const * where = is_known(encoding, supported_CJK_encodings);
- if (where)
- return coded_supported_CJK_encodings[where - supported_CJK_encodings];
- return encoding;
-}
-
/*!
* natbib commands.
* The starred forms are also known except for "citefullauthor",
// LyX doesn't support the second argument so if
// this is used we need to output everything as ERT
string const mapping = p.getArg('{', '}');
- if ((!mapping.empty() && mapping != " ")
- || (!is_known(encoding, supported_CJK_encodings))) {
+ char const * const * const where =
+ is_known(encoding, supported_CJK_encodings);
+ if ((!mapping.empty() && mapping != " ") || !where) {
parent_context.check_layout(os);
handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
parent_context);
handle_ert(os, "\\end{" + name + "}",
parent_context);
} else {
- string const lang = CJK2lyx(encoding);
+ string const lang =
+ supported_CJK_languages[where - supported_CJK_encodings];
// store the language because we must reset it at the end
string const lang_old = parent_context.font.language;
parent_context.font.language = lang;
while (p.good()) {
Token const & t = p.get_token();
- // it is impossible to determine the correct document language if CJK is used.
- // Therefore write a note at the beginning of the document
- if (have_CJK) {
- context.check_layout(os);
- begin_inset(os, "Note Note\n");
- os << "status open\n\\begin_layout Plain Layout\n"
- << "\\series bold\n"
- << "Important information:\n"
- << "\\end_layout\n\n"
- << "\\begin_layout Plain Layout\n"
- << "This document contains text in Chinese, Japanese or Korean.\n"
- << " It was therefore impossible for tex2lyx to set the correct document language for your document."
- << " Please set the language manually in the document settings.\n"
- << "\\end_layout\n";
- end_inset(os);
- have_CJK = false;
- }
-
// it is impossible to determine the correct encoding for non-CJK Japanese.
// Therefore write a note at the beginning of the document
if (is_nonCJKJapanese) {
}
}
+
+string guessLanguage(Parser & p, string const & lang)
+{
+ typedef std::map<std::string, size_t> LangMap;
+ // map from language names to number of characters
+ LangMap used;
+ used[lang] = 0;
+ for (char const * const * i = supported_CJK_languages; *i; i++)
+ used[string(*i)] = 0;
+
+ while (p.good()) {
+ Token const t = p.get_token();
+ // comments are not counted for any language
+ if (t.cat() == catComment)
+ continue;
+ // commands are not counted as well, but we need to detect
+ // \begin{CJK} and switch encoding if needed
+ if (t.cat() == catEscape) {
+ if (t.cs() == "inputencoding") {
+ string const enc = subst(p.verbatim_item(), "\n", " ");
+ p.setEncoding(enc);
+ continue;
+ }
+ if (t.cs() != "begin")
+ continue;
+ } else {
+ // Non-CJK content is counted for lang.
+ // We do not care about the real language here:
+ // If we have more non-CJK contents than CJK contents,
+ // we simply use the language that was specified as
+ // babel main language.
+ used[lang] += t.asInput().length();
+ continue;
+ }
+ // Now we are starting an environment
+ p.pushPosition();
+ string const name = p.getArg('{', '}');
+ if (name != "CJK") {
+ p.popPosition();
+ continue;
+ }
+ // It is a CJK environment
+ p.popPosition();
+ /* name = */ p.getArg('{', '}');
+ string const encoding = p.getArg('{', '}');
+ /* mapping = */ p.getArg('{', '}');
+ string const encoding_old = p.getEncoding();
+ char const * const * const where =
+ is_known(encoding, supported_CJK_encodings);
+ if (where)
+ p.setEncoding(encoding);
+ else
+ p.setEncoding("utf8");
+ string const text = p.verbatimEnvironment("CJK");
+ p.setEncoding(encoding_old);
+ p.skip_spaces();
+ if (!where) {
+ // ignore contents in unknown CJK encoding
+ continue;
+ }
+ // the language of the text
+ string const cjk =
+ supported_CJK_languages[where - supported_CJK_encodings];
+ used[cjk] += text.length();
+ }
+ LangMap::const_iterator use = used.begin();
+ for (LangMap::const_iterator it = used.begin(); it != used.end(); ++it) {
+ if (it->second > use->second)
+ use = it;
+ }
+ return use->first;
+}
+
// }])