X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2Ftext.cpp;h=50eee93a3d2291596ee63140d7afac665298232f;hb=8e7b5eebf0ef9742e5f6983d8f55f563c12a31ab;hp=e57784fc0782f7963716993b14b0be2076528ef1;hpb=05446b232f7716fecdea26af61b3fa1de0274c4d;p=lyx.git diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index e57784fc07..50eee93a3d 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -117,6 +117,62 @@ char const * const known_ref_commands[] = { "ref", "pageref", "vref", char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref", "vpageref", "formatted", "eqref", 0 }; +/** + * known polyglossia language names (including variants) + */ +const char * const polyglossia_languages[] = { +"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", +"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil", +"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", +"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish", +"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", +"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil", +"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french", +"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", +"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek", +"marathi", "spanish", +"american", "ancient", "australian", "british", "monotonic", "newzealand", +"polytonic", 0}; + +/** + * the same as polyglossia_languages with .lyx names + * please keep this in sync with polyglossia_languages line by line! + */ +const char * const coded_polyglossia_languages[] = { +"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", +"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil", +"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", +"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish", +"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", +"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian", +"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french", +"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", +"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek", +"marathi", "spanish", +"american", "ancientgreek", "australian", "british", "greek", "newzealand", +"polutonikogreek", 0}; + +/** + * supported CJK encodings + */ +const char * const supported_CJK_encodings[] = { +"EUC-JP", "KS", "GB", "UTF8", 0}; + +/** + * the same as supported_CJK_encodings with their corresponding LyX language name + * please keep this in sync with supported_CJK_encodings line by line! + */ +const char * const coded_supported_CJK_encodings[] = { +"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0}; + +string CJK2lyx(string const & encoding) +{ + char const * const * where = is_known(encoding, supported_CJK_encodings); + if (where) + return coded_supported_CJK_encodings[where - supported_CJK_encodings]; + return encoding; +} + /*! * natbib commands. * The starred forms are also known except for "citefullauthor", @@ -456,11 +512,14 @@ docstring convert_unicodesymbols(docstring s) continue; } s = s.substr(i); + bool termination; docstring rem; set req; docstring parsed = encodings.fromLaTeXCommand(s, - Encodings::TEXT_CMD, rem, &req); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + Encodings::TEXT_CMD, termination, rem, &req); + set::const_iterator it = req.begin(); + set::const_iterator en = req.end(); + for (; it != en; ++it) preamble.registerAutomaticallyLoadedPackage(*it); os << parsed; s = rem; @@ -1118,16 +1177,28 @@ void parse_outer_box(Parser & p, ostream & os, unsigned flags, bool outer, } -void parse_listings(Parser & p, ostream & os, Context & parent_context) +void parse_listings(Parser & p, ostream & os, Context & parent_context, bool in_line) { parent_context.check_layout(os); begin_inset(os, "listings\n"); - os << "inline false\n" - << "status collapsed\n"; + if (p.hasOpt()) { + string arg = p.verbatimOption(); + os << "lstparams " << '"' << arg << '"' << '\n'; + } + if (in_line) + os << "inline true\n"; + else + os << "inline false\n"; + os << "status collapsed\n"; Context context(true, parent_context.textclass); context.layout = &parent_context.textclass.plainLayout(); - context.check_layout(os); - string const s = p.verbatimEnvironment("lstlisting"); + string s; + if (in_line) { + s = p.plainCommand('!', '!', "lstinline"); + context.new_paragraph(os); + context.check_layout(os); + } else + s = p.plainEnvironment("lstlisting"); for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { if (*it == '\\') os << "\n\\backslash\n"; @@ -1173,8 +1244,7 @@ void parse_unknown_environment(Parser & p, string const & name, ostream & os, void parse_environment(Parser & p, ostream & os, bool outer, - string & last_env, bool & title_layout_found, - Context & parent_context) + string & last_env, Context & parent_context) { Layout const * newlayout; InsetLayout const * newinsetlayout = 0; @@ -1198,6 +1268,23 @@ void parse_environment(Parser & p, ostream & os, bool outer, } } + else if (is_known(name, polyglossia_languages)) { + // We must begin a new paragraph if not already done + if (! parent_context.atParagraphStart()) { + parent_context.check_end_layout(os); + parent_context.new_paragraph(os); + } + // save the language in the context so that it is + // handled by parse_text + parent_context.font.language = polyglossia2lyx(name); + parse_text(p, os, FLAG_END, outer, parent_context); + // Just in case the environment is empty + parent_context.extra_stuff.erase(); + // We must begin a new paragraph to reset the language + parent_context.new_paragraph(os); + p.skip_spaces(); + } + else if (unstarred_name == "tabular" || name == "longtable") { eat_whitespace(p, os, parent_context, false); string width = "0pt"; @@ -1267,7 +1354,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, // we must make sure that the next item gets a \begin_layout. parent_context.new_paragraph(os); p.skip_spaces(); - preamble.registerAutomaticallyLoadedPackage("rotfloat"); + preamble.registerAutomaticallyLoadedPackage("rotfloat"); } else if (name == "wrapfigure" || name == "wraptable") { @@ -1300,7 +1387,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, // we must make sure that the next item gets a \begin_layout. parent_context.new_paragraph(os); p.skip_spaces(); - preamble.registerAutomaticallyLoadedPackage("wrapfig"); + preamble.registerAutomaticallyLoadedPackage("wrapfig"); } else if (name == "minipage") { @@ -1344,28 +1431,80 @@ void parse_environment(Parser & p, ostream & os, bool outer, } else if (name == "verbatim") { - eat_whitespace(p, os, parent_context, false); - os << "\n\\begin_layout Verbatim\n"; - string const s = p.verbatimEnvironment("verbatim"); + os << "\n\\end_layout\n\n\\begin_layout Verbatim\n"; + string const s = p.plainEnvironment("verbatim"); string::const_iterator it2 = s.begin(); for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { - if (*it == '\n') { + if (*it == '\\') + os << "\\backslash "; + else if (*it == '\n') { it2 = it + 1; // avoid adding an empty paragraph at the end - // if there are 2 consecutive spaces at the end ignore it + // FIXME: if there are 2 consecutive spaces at the end ignore it // because LyX will re-add a \n + // This hack must be removed once bug 8049 is fixed! if ((it + 1 != et) && (it + 2 != et || *it2 != '\n')) os << "\n\\end_layout\n\\begin_layout Verbatim\n"; - } else + } else os << *it; } os << "\n\\end_layout\n\n"; p.skip_spaces(); - skip_braces(p); // eat {} that might by set by LyX behind comments // reset to Standard layout os << "\n\\begin_layout Standard\n"; } + else if (name == "CJK") { + // the scheme is \begin{CJK}{encoding}{mapping}{text} + // It is impossible to decide if a CJK environment was in its own paragraph or within + // a line. We therefore always assume a paragraph since the latter is a rare case. + eat_whitespace(p, os, parent_context, false); + parent_context.check_end_layout(os); + // store the encoding to be able to reset it + string const encoding_old = p.encoding_latex_; + string const encoding = p.getArg('{', '}'); + // SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp + // JIS does not work with LyX's encoding conversion + if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS") + p.setEncoding(encoding); + else + p.setEncoding("utf8"); + // LyX doesn't support the second argument so if + // this is used we need to output everything as ERT + string const mapping = p.getArg('{', '}'); + if ( (!mapping.empty() && mapping != " ") + || (!is_known(encoding, supported_CJK_encodings))) { + parent_context.check_layout(os); + handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}", + parent_context); + // we must parse the content as verbatim because e.g. SJIS can contain + // normally invalid characters + string const s = p.plainEnvironment("CJK"); + string::const_iterator it2 = s.begin(); + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + handle_ert(os, "\\", parent_context); + else if (*it == '$') + handle_ert(os, "$", parent_context); + else + os << *it; + } + p.skip_spaces(); + handle_ert(os, "\\end{" + name + "}", + parent_context); + } else { + string const lang = CJK2lyx(encoding); + // store the language because we must reset it at the end + string const lang_old = parent_context.font.language; + parent_context.font.language = lang; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + parent_context.font.language = lang_old; + parent_context.new_paragraph(os); + } + p.encoding_latex_ = encoding_old; + p.skip_spaces(); + } + else if (name == "lyxgreyedout") { eat_whitespace(p, os, parent_context, false); parent_context.check_layout(os); @@ -1386,14 +1525,9 @@ void parse_environment(Parser & p, ostream & os, bool outer, else if (name == "lstlisting") { eat_whitespace(p, os, parent_context, false); - // FIXME handle listings with parameters - // If this is added, don't forgot to handle the - // automatic color package loading - if (p.hasOpt()) - parse_unknown_environment(p, name, os, FLAG_END, - outer, parent_context); - else - parse_listings(p, os, parent_context); + // FIXME handle the automatic color package loading + // uwestoehr asks: In what case color is loaded? + parse_listings(p, os, parent_context, false); p.skip_spaces(); } @@ -1545,10 +1679,12 @@ void parse_environment(Parser & p, ostream & os, bool outer, context.check_end_deeper(os); parent_context.new_paragraph(os); p.skip_spaces(); - if (!title_layout_found) - title_layout_found = newlayout->intitle; + if (!preamble.titleLayoutFound()) + preamble.titleLayoutFound(newlayout->intitle); set const & req = newlayout->requires(); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + set::const_iterator it = req.begin(); + set::const_iterator en = req.end(); + for (; it != en; ++it) preamble.registerAutomaticallyLoadedPackage(*it); } @@ -1962,10 +2098,46 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, bool const use_natbib = preamble.isPackageUsed("natbib"); bool const use_jurabib = preamble.isPackageUsed("jurabib"); string last_env; - bool title_layout_found = false; while (p.good()) { Token const & t = p.get_token(); + // it is impossible to determine the correct document language if CJK is used. + // Therefore write a note at the beginning of the document + if (have_CJK) { + context.check_layout(os); + begin_inset(os, "Note Note\n"); + os << "status open\n\\begin_layout Plain Layout\n" + << "\\series bold\n" + << "Important information:\n" + << "\\end_layout\n\n" + << "\\begin_layout Plain Layout\n" + << "This document contains text in Chinese, Japanese or Korean.\n" + << " It was therefore impossible for tex2lyx to set the correct document langue for your document." + << " Please set in the document settings by yourself!\n" + << "\\end_layout\n"; + end_inset(os); + have_CJK = false; + } + + // it is impossible to determine the correct encoding for non-CJK Japanese. + // Therefore write a note at the beginning of the document + if (is_nonCJKJapanese) { + context.check_layout(os); + begin_inset(os, "Note Note\n"); + os << "status open\n\\begin_layout Plain Layout\n" + << "\\series bold\n" + << "Important information:\n" + << "\\end_layout\n\n" + << "\\begin_layout Plain Layout\n" + << "This document is in Japanese (non-CJK).\n" + << " It was therefore impossible for tex2lyx to determine the correct encoding." + << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct" + << " encoding in the document settings.\n" + << "\\end_layout\n"; + end_inset(os); + is_nonCJKJapanese = false; + } + #ifdef FILEDEBUG debugToken(cerr, t, flags); #endif @@ -2289,7 +2461,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "begin") parse_environment(p, os, outer, last_env, - title_layout_found, context); + context); else if (t.cs() == "end") { if (flags & FLAG_END) { @@ -2460,10 +2632,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // Must catch empty dates before findLayout is called below else if (t.cs() == "date") { + eat_whitespace(p, os, context, false); + p.pushPosition(); string const date = p.verbatim_item(); - if (date.empty()) + p.popPosition(); + if (date.empty()) { preamble.suppressDate(true); - else { + p.verbatim_item(); + } else { preamble.suppressDate(false); if (context.new_layout_allowed && (newlayout = findLayout(context.textclass, @@ -2471,16 +2647,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // write the layout output_command_layout(os, p, outer, context, newlayout); - p.skip_spaces(); - if (!title_layout_found) - title_layout_found = newlayout->intitle; + parse_text_snippet(p, os, FLAG_ITEM, outer, context); + if (!preamble.titleLayoutFound()) + preamble.titleLayoutFound(newlayout->intitle); set const & req = newlayout->requires(); - for (set::const_iterator it = req.begin(); - it != req.end(); it++) + set::const_iterator it = req.begin(); + set::const_iterator en = req.end(); + for (; it != en; ++it) preamble.registerAutomaticallyLoadedPackage(*it); } else - handle_ert(os, "\\date{" + date + '}', - context); + handle_ert(os, + "\\date{" + p.verbatim_item() + '}', + context); } } @@ -2493,10 +2671,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, p.get_token(); output_command_layout(os, p, outer, context, newlayout); p.skip_spaces(); - if (!title_layout_found) - title_layout_found = newlayout->intitle; + if (!preamble.titleLayoutFound()) + preamble.titleLayoutFound(newlayout->intitle); set const & req = newlayout->requires(); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + for (set::const_iterator it = req.begin(); it != req.end(); ++it) preamble.registerAutomaticallyLoadedPackage(*it); } @@ -2506,10 +2684,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // write the layout output_command_layout(os, p, outer, context, newlayout); p.skip_spaces(); - if (!title_layout_found) - title_layout_found = newlayout->intitle; + if (!preamble.titleLayoutFound()) + preamble.titleLayoutFound(newlayout->intitle); set const & req = newlayout->requires(); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + for (set::const_iterator it = req.begin(); it != req.end(); ++it) preamble.registerAutomaticallyLoadedPackage(*it); } @@ -2781,6 +2959,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, end_inset(os); } + else if (t.cs() == "lstinline") { + p.skip_spaces(); + parse_listings(p, os, context, true); + } + else if (t.cs() == "ensuremath") { p.skip_spaces(); context.check_layout(os); @@ -2794,18 +2977,20 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } else if (t.cs() == "makeindex" || t.cs() == "maketitle") { - if (title_layout_found) { + if (preamble.titleLayoutFound()) { // swallow this skip_spaces_braces(p); } else handle_ert(os, t.asInput(), context); } - else if (t.cs() == "tableofcontents") { + else if (t.cs() == "tableofcontents" || t.cs() == "lstlistoflistings") { context.check_layout(os); - begin_command_inset(os, "toc", "tableofcontents"); + begin_command_inset(os, "toc", t.cs()); end_inset(os); skip_spaces_braces(p); + if (t.cs() == "lstlistoflistings") + preamble.registerAutomaticallyLoadedPackage("listings"); } else if (t.cs() == "listoffigures") { @@ -3201,12 +3386,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, btprint = key; } - else if (t.cs() == "index") { + else if (t.cs() == "index" || + (t.cs() == "sindex" && preamble.use_indices() == "true")) { context.check_layout(os); - begin_inset(os, "Index idx\n"); - os << "status collapsed\n"; + string const arg = (t.cs() == "sindex" && p.hasOpt()) ? + p.getArg('[', ']') : ""; + string const kind = arg.empty() ? "idx" : arg; + begin_inset(os, "Index "); + os << kind << "\nstatus collapsed\n"; parse_text_in_inset(p, os, FLAG_ITEM, false, context, "Index"); end_inset(os); + if (kind != "idx") + preamble.registerAutomaticallyLoadedPackage("splitidx"); } else if (t.cs() == "nomenclature") { @@ -3371,7 +3562,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.check_layout(os); // save the language for the case that a // \foreignlanguage is used - context.font.language = babel2lyx(p.verbatim_item()); os << "\n\\lang " << context.font.language << "\n"; } @@ -3382,6 +3572,38 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context, "\\lang", context.font.language, lang); } + + else if (is_known(t.cs().substr(4, string::npos), polyglossia_languages)) { + // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[] + string lang; + // We have to output the whole command if it has an option + // because LyX doesn't support this yet, see bug #8214, + // only if there is a single option specifying a variant, we can handle it. + if (p.hasOpt()) { + string langopts = p.getOpt(); + // check if the option contains a variant, if yes, extract it + string::size_type pos_var = langopts.find("variant"); + string::size_type i = langopts.find(','); + if (pos_var != string::npos){ + string variant; + if (i == string::npos) { + variant = langopts.substr(pos_var + 8, langopts.length() - pos_var - 9); + lang = polyglossia2lyx(variant); + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\lang", + context.font.language, lang); + } + else + handle_ert(os, t.asInput() + langopts, context); + } else + handle_ert(os, t.asInput() + langopts, context); + } else { + lang = polyglossia2lyx(t.cs().substr(4, string::npos)); + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\lang", + context.font.language, lang); + } + } else if (t.cs() == "inputencoding") { // nothing to write here @@ -3481,20 +3703,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, && contains("\"'.=^`bcdHkrtuv~", t.cs())) { context.check_layout(os); // try to see whether the string is in unicodesymbols + bool termination; docstring rem; string command = t.asInput() + "{" + trimSpaceAndEol(p.verbatim_item()) + "}"; set req; docstring s = encodings.fromLaTeXCommand(from_utf8(command), - Encodings::TEXT_CMD | Encodings::MATH_CMD, rem, &req); + Encodings::TEXT_CMD | Encodings::MATH_CMD, + termination, rem, &req); if (!s.empty()) { if (!rem.empty()) cerr << "When parsing " << command << ", result is " << to_utf8(s) << "+" << to_utf8(rem) << endl; os << to_utf8(s); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + for (set::const_iterator it = req.begin(); it != req.end(); ++it) preamble.registerAutomaticallyLoadedPackage(*it); } else // we did not find a non-ert version @@ -4087,10 +4311,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // try to see whether the string is in unicodesymbols // Only use text mode commands, since we are in text mode here, // and math commands may be invalid (bug 6797) + bool termination; docstring rem; set req; docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()), - Encodings::TEXT_CMD, rem, &req); + Encodings::TEXT_CMD, termination, rem, &req); if (!s.empty()) { if (!rem.empty()) cerr << "When parsing " << t.cs() @@ -4098,8 +4323,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, << "+" << to_utf8(rem) << endl; context.check_layout(os); os << to_utf8(s); - skip_spaces_braces(p); - for (set::const_iterator it = req.begin(); it != req.end(); it++) + if (termination) + skip_spaces_braces(p); + for (set::const_iterator it = req.begin(); it != req.end(); ++it) preamble.registerAutomaticallyLoadedPackage(*it); } //cerr << "#: " << t << " mode: " << mode << endl;