char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref",
"vpageref", "formatted", "eqref", 0 };
+/**
+ * known polyglossia language names (including variants)
+ */
+const char * const polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil",
+"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french",
+"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancient", "australian", "british", "monotonic", "newzealand",
+"polytonic", 0};
+
+/**
+ * the same as polyglossia_languages with .lyx names
+ * please keep this in sync with polyglossia_languages line by line!
+ */
+const char * const coded_polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian",
+"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french",
+"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancientgreek", "australian", "british", "greek", "newzealand",
+"polutonikogreek", 0};
+
+/**
+ * supported CJK encodings
+ */
+const char * const supported_CJK_encodings[] = {
+"EUC-JP", "KS", "GB", "UTF8", 0};
+
+/**
+ * the same as supported_CJK_encodings with their corresponding LyX language name
+ * please keep this in sync with supported_CJK_encodings line by line!
+ */
+const char * const coded_supported_CJK_encodings[] = {
+"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0};
+
+string CJK2lyx(string const & encoding)
+{
+ char const * const * where = is_known(encoding, supported_CJK_encodings);
+ if (where)
+ return coded_supported_CJK_encodings[where - supported_CJK_encodings];
+ return encoding;
+}
+
/*!
* natbib commands.
* The starred forms are also known except for "citefullauthor",
continue;
}
s = s.substr(i);
+ bool termination;
docstring rem;
set<string> req;
docstring parsed = encodings.fromLaTeXCommand(s,
- Encodings::TEXT_CMD, rem, &req);
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ Encodings::TEXT_CMD, termination, rem, &req);
+ set<string>::const_iterator it = req.begin();
+ set<string>::const_iterator en = req.end();
+ for (; it != en; ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
os << parsed;
s = rem;
void parse_environment(Parser & p, ostream & os, bool outer,
- string & last_env, bool & title_layout_found,
- Context & parent_context)
+ string & last_env, Context & parent_context)
{
Layout const * newlayout;
InsetLayout const * newinsetlayout = 0;
}
}
+ else if (is_known(name, polyglossia_languages)) {
+ // We must begin a new paragraph if not already done
+ if (! parent_context.atParagraphStart()) {
+ parent_context.check_end_layout(os);
+ parent_context.new_paragraph(os);
+ }
+ // save the language in the context so that it is
+ // handled by parse_text
+ parent_context.font.language = polyglossia2lyx(name);
+ parse_text(p, os, FLAG_END, outer, parent_context);
+ // Just in case the environment is empty
+ parent_context.extra_stuff.erase();
+ // We must begin a new paragraph to reset the language
+ parent_context.new_paragraph(os);
+ p.skip_spaces();
+ }
+
else if (unstarred_name == "tabular" || name == "longtable") {
eat_whitespace(p, os, parent_context, false);
string width = "0pt";
os << "\n\\begin_layout Standard\n";
}
+ else if (name == "CJK") {
+ // the scheme is \begin{CJK}{encoding}{mapping}{text}
+ // It is impossible to decide if a CJK environment was in its own paragraph or within
+ // a line. We therefore always assume a paragraph since the latter is a rare case.
+ eat_whitespace(p, os, parent_context, false);
+ parent_context.check_end_layout(os);
+ // store the encoding to be able to reset it
+ string const encoding_old = p.encoding_latex_;
+ string const encoding = p.getArg('{', '}');
+ // SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp
+ // JIS does not work with LyX's encoding conversion
+ if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS")
+ p.setEncoding(encoding);
+ else
+ p.setEncoding("utf8");
+ // LyX doesn't support the second argument so if
+ // this is used we need to output everything as ERT
+ string const mapping = p.getArg('{', '}');
+ if ( (!mapping.empty() && mapping != " ")
+ || (!is_known(encoding, supported_CJK_encodings))) {
+ parent_context.check_layout(os);
+ handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
+ parent_context);
+ // we must parse the content as verbatim because e.g. SJIS can contain
+ // normally invalid characters
+ string const s = p.plainEnvironment("CJK");
+ string::const_iterator it2 = s.begin();
+ for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+ if (*it == '\\')
+ handle_ert(os, "\\", parent_context);
+ else if (*it == '$')
+ handle_ert(os, "$", parent_context);
+ else
+ os << *it;
+ }
+ p.skip_spaces();
+ handle_ert(os, "\\end{" + name + "}",
+ parent_context);
+ } else {
+ string const lang = CJK2lyx(encoding);
+ // store the language because we must reset it at the end
+ string const lang_old = parent_context.font.language;
+ parent_context.font.language = lang;
+ parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
+ parent_context.font.language = lang_old;
+ parent_context.new_paragraph(os);
+ }
+ p.encoding_latex_ = encoding_old;
+ p.skip_spaces();
+ }
+
else if (name == "lyxgreyedout") {
eat_whitespace(p, os, parent_context, false);
parent_context.check_layout(os);
context.check_end_deeper(os);
parent_context.new_paragraph(os);
p.skip_spaces();
- if (!title_layout_found)
- title_layout_found = newlayout->intitle;
+ if (!preamble.titleLayoutFound())
+ preamble.titleLayoutFound(newlayout->intitle);
set<string> const & req = newlayout->requires();
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ set<string>::const_iterator it = req.begin();
+ set<string>::const_iterator en = req.end();
+ for (; it != en; ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
}
bool const use_natbib = preamble.isPackageUsed("natbib");
bool const use_jurabib = preamble.isPackageUsed("jurabib");
string last_env;
- bool title_layout_found = false;
while (p.good()) {
Token const & t = p.get_token();
+ // it is impossible to determine the correct document language if CJK is used.
+ // Therefore write a note at the beginning of the document
+ if (have_CJK) {
+ context.check_layout(os);
+ begin_inset(os, "Note Note\n");
+ os << "status open\n\\begin_layout Plain Layout\n"
+ << "\\series bold\n"
+ << "Important information:\n"
+ << "\\end_layout\n\n"
+ << "\\begin_layout Plain Layout\n"
+ << "This document contains text in Chinese, Japanese or Korean.\n"
+ << " It was therefore impossible for tex2lyx to set the correct document langue for your document."
+ << " Please set in the document settings by yourself!\n"
+ << "\\end_layout\n";
+ end_inset(os);
+ have_CJK = false;
+ }
+
+ // it is impossible to determine the correct encoding for non-CJK Japanese.
+ // Therefore write a note at the beginning of the document
+ if (is_nonCJKJapanese) {
+ context.check_layout(os);
+ begin_inset(os, "Note Note\n");
+ os << "status open\n\\begin_layout Plain Layout\n"
+ << "\\series bold\n"
+ << "Important information:\n"
+ << "\\end_layout\n\n"
+ << "\\begin_layout Plain Layout\n"
+ << "This document is in Japanese (non-CJK).\n"
+ << " It was therefore impossible for tex2lyx to determine the correct encoding."
+ << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct"
+ << " encoding in the document settings.\n"
+ << "\\end_layout\n";
+ end_inset(os);
+ is_nonCJKJapanese = false;
+ }
+
#ifdef FILEDEBUG
debugToken(cerr, t, flags);
#endif
else if (t.cs() == "begin")
parse_environment(p, os, outer, last_env,
- title_layout_found, context);
+ context);
else if (t.cs() == "end") {
if (flags & FLAG_END) {
// Must catch empty dates before findLayout is called below
else if (t.cs() == "date") {
+ eat_whitespace(p, os, context, false);
+ p.pushPosition();
string const date = p.verbatim_item();
- if (date.empty())
+ p.popPosition();
+ if (date.empty()) {
preamble.suppressDate(true);
- else {
+ p.verbatim_item();
+ } else {
preamble.suppressDate(false);
if (context.new_layout_allowed &&
(newlayout = findLayout(context.textclass,
// write the layout
output_command_layout(os, p, outer,
context, newlayout);
- p.skip_spaces();
- if (!title_layout_found)
- title_layout_found = newlayout->intitle;
+ parse_text_snippet(p, os, FLAG_ITEM, outer, context);
+ if (!preamble.titleLayoutFound())
+ preamble.titleLayoutFound(newlayout->intitle);
set<string> const & req = newlayout->requires();
- for (set<string>::const_iterator it = req.begin();
- it != req.end(); it++)
+ set<string>::const_iterator it = req.begin();
+ set<string>::const_iterator en = req.end();
+ for (; it != en; ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
} else
- handle_ert(os, "\\date{" + date + '}',
- context);
+ handle_ert(os,
+ "\\date{" + p.verbatim_item() + '}',
+ context);
}
}
p.get_token();
output_command_layout(os, p, outer, context, newlayout);
p.skip_spaces();
- if (!title_layout_found)
- title_layout_found = newlayout->intitle;
+ if (!preamble.titleLayoutFound())
+ preamble.titleLayoutFound(newlayout->intitle);
set<string> const & req = newlayout->requires();
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
}
// write the layout
output_command_layout(os, p, outer, context, newlayout);
p.skip_spaces();
- if (!title_layout_found)
- title_layout_found = newlayout->intitle;
+ if (!preamble.titleLayoutFound())
+ preamble.titleLayoutFound(newlayout->intitle);
set<string> const & req = newlayout->requires();
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
}
}
else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
- if (title_layout_found) {
+ if (preamble.titleLayoutFound()) {
// swallow this
skip_spaces_braces(p);
} else
handle_ert(os, t.asInput(), context);
}
- else if (t.cs() == "tableofcontents") {
+ else if (t.cs() == "tableofcontents" || t.cs() == "lstlistoflistings") {
context.check_layout(os);
- begin_command_inset(os, "toc", "tableofcontents");
+ begin_command_inset(os, "toc", t.cs());
end_inset(os);
skip_spaces_braces(p);
+ if (t.cs() == "lstlistoflistings")
+ preamble.registerAutomaticallyLoadedPackage("listings");
}
else if (t.cs() == "listoffigures") {
context.check_layout(os);
// save the language for the case that a
// \foreignlanguage is used
-
context.font.language = babel2lyx(p.verbatim_item());
os << "\n\\lang " << context.font.language << "\n";
}
context, "\\lang",
context.font.language, lang);
}
+
+ else if (is_known(t.cs().substr(4, string::npos), polyglossia_languages)) {
+ // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[]
+ string lang;
+ // We have to output the whole command if it has an option
+ // because LyX doesn't support this yet, see bug #8214,
+ // only if there is a single option specifying a variant, we can handle it.
+ if (p.hasOpt()) {
+ string langopts = p.getOpt();
+ // check if the option contains a variant, if yes, extract it
+ string::size_type pos_var = langopts.find("variant");
+ string::size_type i = langopts.find(',');
+ if (pos_var != string::npos){
+ string variant;
+ if (i == string::npos) {
+ variant = langopts.substr(pos_var + 8, langopts.length() - pos_var - 9);
+ lang = polyglossia2lyx(variant);
+ parse_text_attributes(p, os, FLAG_ITEM, outer,
+ context, "\\lang",
+ context.font.language, lang);
+ }
+ else
+ handle_ert(os, t.asInput() + langopts, context);
+ } else
+ handle_ert(os, t.asInput() + langopts, context);
+ } else {
+ lang = polyglossia2lyx(t.cs().substr(4, string::npos));
+ parse_text_attributes(p, os, FLAG_ITEM, outer,
+ context, "\\lang",
+ context.font.language, lang);
+ }
+ }
else if (t.cs() == "inputencoding") {
// nothing to write here
&& contains("\"'.=^`bcdHkrtuv~", t.cs())) {
context.check_layout(os);
// try to see whether the string is in unicodesymbols
+ bool termination;
docstring rem;
string command = t.asInput() + "{"
+ trimSpaceAndEol(p.verbatim_item())
+ "}";
set<string> req;
docstring s = encodings.fromLaTeXCommand(from_utf8(command),
- Encodings::TEXT_CMD | Encodings::MATH_CMD, rem, &req);
+ Encodings::TEXT_CMD | Encodings::MATH_CMD,
+ termination, rem, &req);
if (!s.empty()) {
if (!rem.empty())
cerr << "When parsing " << command
<< ", result is " << to_utf8(s)
<< "+" << to_utf8(rem) << endl;
os << to_utf8(s);
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
} else
// we did not find a non-ert version
// try to see whether the string is in unicodesymbols
// Only use text mode commands, since we are in text mode here,
// and math commands may be invalid (bug 6797)
+ bool termination;
docstring rem;
set<string> req;
docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()),
- Encodings::TEXT_CMD, rem, &req);
+ Encodings::TEXT_CMD, termination, rem, &req);
if (!s.empty()) {
if (!rem.empty())
cerr << "When parsing " << t.cs()
<< "+" << to_utf8(rem) << endl;
context.check_layout(os);
os << to_utf8(s);
- skip_spaces_braces(p);
- for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+ if (termination)
+ skip_spaces_braces(p);
+ for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
preamble.registerAutomaticallyLoadedPackage(*it);
}
//cerr << "#: " << t << " mode: " << mode << endl;