]> git.lyx.org Git - lyx.git/blobdiff - src/tex2lyx/text.cpp
update tex2lyx todo list
[lyx.git] / src / tex2lyx / text.cpp
index d6392db3a2ce2c7d0bbb0b28c68451402c63b8b6..50eee93a3d2291596ee63140d7afac665298232f 100644 (file)
@@ -117,6 +117,62 @@ char const * const known_ref_commands[] = { "ref", "pageref", "vref",
 char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref",
  "vpageref", "formatted", "eqref", 0 };
 
+/**
+ * known polyglossia language names (including variants)
+ */
+const char * const polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil",
+"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french",
+"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancient", "australian", "british", "monotonic", "newzealand",
+"polytonic", 0};
+
+/**
+ * the same as polyglossia_languages with .lyx names
+ * please keep this in sync with polyglossia_languages line by line!
+ */
+const char * const coded_polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian",
+"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french",
+"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancientgreek", "australian", "british", "greek", "newzealand",
+"polutonikogreek", 0};
+
+/**
+ * supported CJK encodings
+ */
+const char * const supported_CJK_encodings[] = {
+"EUC-JP", "KS", "GB", "UTF8", 0};
+
+/**
+ * the same as supported_CJK_encodings with their corresponding LyX language name
+ * please keep this in sync with supported_CJK_encodings line by line!
+ */
+const char * const coded_supported_CJK_encodings[] = {
+"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0};
+
+string CJK2lyx(string const & encoding)
+{
+       char const * const * where = is_known(encoding, supported_CJK_encodings);
+       if (where)
+               return coded_supported_CJK_encodings[where - supported_CJK_encodings];
+       return encoding;
+}
+
 /*!
  * natbib commands.
  * The starred forms are also known except for "citefullauthor",
@@ -456,11 +512,14 @@ docstring convert_unicodesymbols(docstring s)
                        continue;
                }
                s = s.substr(i);
+               bool termination;
                docstring rem;
                set<string> req;
                docstring parsed = encodings.fromLaTeXCommand(s,
-                               Encodings::TEXT_CMD, rem, &req);
-               for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+                               Encodings::TEXT_CMD, termination, rem, &req);
+               set<string>::const_iterator it = req.begin();
+               set<string>::const_iterator en = req.end();
+               for (; it != en; ++it)
                        preamble.registerAutomaticallyLoadedPackage(*it);
                os << parsed;
                s = rem;
@@ -1118,16 +1177,28 @@ void parse_outer_box(Parser & p, ostream & os, unsigned flags, bool outer,
 }
 
 
-void parse_listings(Parser & p, ostream & os, Context & parent_context)
+void parse_listings(Parser & p, ostream & os, Context & parent_context, bool in_line)
 {
        parent_context.check_layout(os);
        begin_inset(os, "listings\n");
-       os << "inline false\n"
-          << "status collapsed\n";
+       if (p.hasOpt()) {
+               string arg = p.verbatimOption();
+               os << "lstparams " << '"' << arg << '"' << '\n';
+       }
+       if (in_line)
+               os << "inline true\n";
+       else
+               os << "inline false\n";
+       os << "status collapsed\n";
        Context context(true, parent_context.textclass);
        context.layout = &parent_context.textclass.plainLayout();
-       context.check_layout(os);
-       string const s = p.verbatimEnvironment("lstlisting");
+       string s;
+       if (in_line) {
+               s = p.plainCommand('!', '!', "lstinline");
+               context.new_paragraph(os);
+               context.check_layout(os);
+       } else
+               s = p.plainEnvironment("lstlisting");
        for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
                if (*it == '\\')
                        os << "\n\\backslash\n";
@@ -1173,8 +1244,7 @@ void parse_unknown_environment(Parser & p, string const & name, ostream & os,
 
 
 void parse_environment(Parser & p, ostream & os, bool outer,
-                       string & last_env, bool & title_layout_found,
-                       Context & parent_context)
+                       string & last_env, Context & parent_context)
 {
        Layout const * newlayout;
        InsetLayout const * newinsetlayout = 0;
@@ -1198,6 +1268,23 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                }
        }
 
+       else if (is_known(name, polyglossia_languages)) {
+               // We must begin a new paragraph if not already done
+               if (! parent_context.atParagraphStart()) {
+                       parent_context.check_end_layout(os);
+                       parent_context.new_paragraph(os);
+               }
+               // save the language in the context so that it is
+               // handled by parse_text
+               parent_context.font.language = polyglossia2lyx(name);
+               parse_text(p, os, FLAG_END, outer, parent_context);
+               // Just in case the environment is empty
+               parent_context.extra_stuff.erase();
+               // We must begin a new paragraph to reset the language
+               parent_context.new_paragraph(os);
+               p.skip_spaces();
+       }
+
        else if (unstarred_name == "tabular" || name == "longtable") {
                eat_whitespace(p, os, parent_context, false);
                string width = "0pt";
@@ -1367,6 +1454,57 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                os << "\n\\begin_layout Standard\n";
        }
 
+       else if (name == "CJK") {
+               // the scheme is \begin{CJK}{encoding}{mapping}{text}
+               // It is impossible to decide if a CJK environment was in its own paragraph or within
+               // a line. We therefore always assume a paragraph since the latter is a rare case.
+               eat_whitespace(p, os, parent_context, false);
+               parent_context.check_end_layout(os);
+               // store the encoding to be able to reset it
+               string const encoding_old = p.encoding_latex_;
+               string const encoding = p.getArg('{', '}');
+               // SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp
+               // JIS does not work with LyX's encoding conversion
+               if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS")
+                       p.setEncoding(encoding);
+               else
+                       p.setEncoding("utf8");
+               // LyX doesn't support the second argument so if
+               // this is used we need to output everything as ERT
+               string const mapping = p.getArg('{', '}');
+               if ( (!mapping.empty() && mapping != " ")
+                       || (!is_known(encoding, supported_CJK_encodings))) {
+                       parent_context.check_layout(os);
+                       handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
+                                      parent_context);
+                       // we must parse the content as verbatim because e.g. SJIS can contain
+                       // normally invalid characters
+                       string const s = p.plainEnvironment("CJK");
+                       string::const_iterator it2 = s.begin();
+                       for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+                               if (*it == '\\')
+                                       handle_ert(os, "\\", parent_context);
+                               else if (*it == '$')
+                                       handle_ert(os, "$", parent_context);
+                               else 
+                                       os << *it;
+                       }
+                       p.skip_spaces();
+                       handle_ert(os, "\\end{" + name + "}",
+                                      parent_context);
+               } else {
+                       string const lang = CJK2lyx(encoding);
+                       // store the language because we must reset it at the end
+                       string const lang_old = parent_context.font.language;
+                       parent_context.font.language = lang;
+                       parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
+                       parent_context.font.language = lang_old;
+                       parent_context.new_paragraph(os);
+               }
+               p.encoding_latex_ = encoding_old;
+               p.skip_spaces();
+       }
+
        else if (name == "lyxgreyedout") {
                eat_whitespace(p, os, parent_context, false);
                parent_context.check_layout(os);
@@ -1387,14 +1525,9 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 
        else if (name == "lstlisting") {
                eat_whitespace(p, os, parent_context, false);
-               // FIXME handle listings with parameters
-               //       If this is added, don't forgot to handle the
-               //       automatic color package loading
-               if (p.hasOpt())
-                       parse_unknown_environment(p, name, os, FLAG_END,
-                                                 outer, parent_context);
-               else
-                       parse_listings(p, os, parent_context);
+               // FIXME handle the automatic color package loading
+               // uwestoehr asks: In what case color is loaded?
+               parse_listings(p, os, parent_context, false);
                p.skip_spaces();
        }
 
@@ -1546,10 +1679,12 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                context.check_end_deeper(os);
                parent_context.new_paragraph(os);
                p.skip_spaces();
-               if (!title_layout_found)
-                       title_layout_found = newlayout->intitle;
+               if (!preamble.titleLayoutFound())
+                       preamble.titleLayoutFound(newlayout->intitle);
                set<string> const & req = newlayout->requires();
-               for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+               set<string>::const_iterator it = req.begin();
+               set<string>::const_iterator en = req.end();
+               for (; it != en; ++it)
                        preamble.registerAutomaticallyLoadedPackage(*it);
        }
 
@@ -1963,10 +2098,46 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
        bool const use_natbib = preamble.isPackageUsed("natbib");
        bool const use_jurabib = preamble.isPackageUsed("jurabib");
        string last_env;
-       bool title_layout_found = false;
        while (p.good()) {
                Token const & t = p.get_token();
 
+       // it is impossible to determine the correct document language if CJK is used.
+       // Therefore write a note at the beginning of the document
+       if (have_CJK) {
+               context.check_layout(os);
+               begin_inset(os, "Note Note\n");
+               os << "status open\n\\begin_layout Plain Layout\n"
+                  << "\\series bold\n"
+                  << "Important information:\n"
+                  << "\\end_layout\n\n"
+                  << "\\begin_layout Plain Layout\n"
+                  << "This document contains text in Chinese, Japanese or Korean.\n"
+                  << " It was therefore impossible for tex2lyx to set the correct document langue for your document."
+                  << " Please set in the document settings by yourself!\n"
+                  << "\\end_layout\n";
+               end_inset(os);
+               have_CJK = false;
+       }
+
+       // it is impossible to determine the correct encoding for non-CJK Japanese.
+       // Therefore write a note at the beginning of the document
+       if (is_nonCJKJapanese) {
+               context.check_layout(os);
+               begin_inset(os, "Note Note\n");
+               os << "status open\n\\begin_layout Plain Layout\n"
+                  << "\\series bold\n"
+                  << "Important information:\n"
+                  << "\\end_layout\n\n"
+                  << "\\begin_layout Plain Layout\n"
+                  << "This document is in Japanese (non-CJK).\n"
+                  << " It was therefore impossible for tex2lyx to determine the correct encoding."
+                  << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct"
+                  << " encoding in the document settings.\n"
+                  << "\\end_layout\n";
+               end_inset(os);
+               is_nonCJKJapanese = false;
+       }
+
 #ifdef FILEDEBUG
                debugToken(cerr, t, flags);
 #endif
@@ -2290,7 +2461,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 
                else if (t.cs() == "begin")
                        parse_environment(p, os, outer, last_env,
-                                         title_layout_found, context);
+                                         context);
 
                else if (t.cs() == "end") {
                        if (flags & FLAG_END) {
@@ -2461,10 +2632,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 
                // Must catch empty dates before findLayout is called below
                else if (t.cs() == "date") {
+                       eat_whitespace(p, os, context, false);
+                       p.pushPosition();
                        string const date = p.verbatim_item();
-                       if (date.empty())
+                       p.popPosition();
+                       if (date.empty()) {
                                preamble.suppressDate(true);
-                       else {
+                               p.verbatim_item();
+                       } else {
                                preamble.suppressDate(false);
                                if (context.new_layout_allowed &&
                                    (newlayout = findLayout(context.textclass,
@@ -2472,16 +2647,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                        // write the layout
                                        output_command_layout(os, p, outer,
                                                        context, newlayout);
-                                       p.skip_spaces();
-                                       if (!title_layout_found)
-                                               title_layout_found = newlayout->intitle;
+                                       parse_text_snippet(p, os, FLAG_ITEM, outer, context);
+                                       if (!preamble.titleLayoutFound())
+                                               preamble.titleLayoutFound(newlayout->intitle);
                                        set<string> const & req = newlayout->requires();
-                                       for (set<string>::const_iterator it = req.begin();
-                                            it != req.end(); it++)
+                                       set<string>::const_iterator it = req.begin();
+                                       set<string>::const_iterator en = req.end();
+                                       for (; it != en; ++it)
                                                preamble.registerAutomaticallyLoadedPackage(*it);
                                } else
-                                       handle_ert(os, "\\date{" + date + '}',
-                                                       context);
+                                       handle_ert(os,
+                                               "\\date{" + p.verbatim_item() + '}',
+                                               context);
                        }
                }
 
@@ -2494,10 +2671,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                        p.get_token();
                        output_command_layout(os, p, outer, context, newlayout);
                        p.skip_spaces();
-                       if (!title_layout_found)
-                               title_layout_found = newlayout->intitle;
+                       if (!preamble.titleLayoutFound())
+                               preamble.titleLayoutFound(newlayout->intitle);
                        set<string> const & req = newlayout->requires();
-                       for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+                       for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
                                preamble.registerAutomaticallyLoadedPackage(*it);
                }
 
@@ -2507,10 +2684,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                        // write the layout
                        output_command_layout(os, p, outer, context, newlayout);
                        p.skip_spaces();
-                       if (!title_layout_found)
-                               title_layout_found = newlayout->intitle;
+                       if (!preamble.titleLayoutFound())
+                               preamble.titleLayoutFound(newlayout->intitle);
                        set<string> const & req = newlayout->requires();
-                       for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+                       for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
                                preamble.registerAutomaticallyLoadedPackage(*it);
                }
 
@@ -2782,6 +2959,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                        end_inset(os);
                }
 
+               else if (t.cs() == "lstinline") {
+                       p.skip_spaces();
+                       parse_listings(p, os, context, true);
+               }
+
                else if (t.cs() == "ensuremath") {
                        p.skip_spaces();
                        context.check_layout(os);
@@ -2795,18 +2977,20 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                }
 
                else if (t.cs() == "makeindex" || t.cs() == "maketitle") {
-                       if (title_layout_found) {
+                       if (preamble.titleLayoutFound()) {
                                // swallow this
                                skip_spaces_braces(p);
                        } else
                                handle_ert(os, t.asInput(), context);
                }
 
-               else if (t.cs() == "tableofcontents") {
+               else if (t.cs() == "tableofcontents" || t.cs() == "lstlistoflistings") {
                        context.check_layout(os);
-                       begin_command_inset(os, "toc", "tableofcontents");
+                       begin_command_inset(os, "toc", t.cs());
                        end_inset(os);
                        skip_spaces_braces(p);
+                       if (t.cs() == "lstlistoflistings")
+                               preamble.registerAutomaticallyLoadedPackage("listings");
                }
 
                else if (t.cs() == "listoffigures") {
@@ -3378,7 +3562,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                        context.check_layout(os);
                        // save the language for the case that a
                        // \foreignlanguage is used
-
                        context.font.language = babel2lyx(p.verbatim_item());
                        os << "\n\\lang " << context.font.language << "\n";
                }
@@ -3389,6 +3572,38 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                              context, "\\lang",
                                              context.font.language, lang);
                }
+               
+               else if (is_known(t.cs().substr(4, string::npos), polyglossia_languages)) {
+                       // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[]
+                       string lang;
+                       // We have to output the whole command if it has an option
+                       // because LyX doesn't support this yet, see bug #8214,
+                       // only if there is a single option specifying a variant, we can handle it.
+                       if (p.hasOpt()) {
+                               string langopts = p.getOpt();
+                               // check if the option contains a variant, if yes, extract it
+                               string::size_type pos_var = langopts.find("variant");
+                               string::size_type i = langopts.find(',');
+                               if (pos_var != string::npos){
+                                       string variant;
+                                       if (i == string::npos) {
+                                               variant = langopts.substr(pos_var + 8, langopts.length() - pos_var - 9);
+                                               lang = polyglossia2lyx(variant);
+                                               parse_text_attributes(p, os, FLAG_ITEM, outer,
+                                                                         context, "\\lang",
+                                                                         context.font.language, lang);
+                                       }
+                                       else
+                                               handle_ert(os, t.asInput() + langopts, context);
+                               } else
+                                       handle_ert(os, t.asInput() + langopts, context);
+                       } else {
+                               lang = polyglossia2lyx(t.cs().substr(4, string::npos));
+                               parse_text_attributes(p, os, FLAG_ITEM, outer,
+                                                         context, "\\lang",
+                                                         context.font.language, lang);
+                       }
+               }
 
                else if (t.cs() == "inputencoding") {
                        // nothing to write here
@@ -3488,20 +3703,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         && contains("\"'.=^`bcdHkrtuv~", t.cs())) {
                        context.check_layout(os);
                        // try to see whether the string is in unicodesymbols
+                       bool termination;
                        docstring rem;
                        string command = t.asInput() + "{"
                                + trimSpaceAndEol(p.verbatim_item())
                                + "}";
                        set<string> req;
                        docstring s = encodings.fromLaTeXCommand(from_utf8(command),
-                               Encodings::TEXT_CMD | Encodings::MATH_CMD, rem, &req);
+                               Encodings::TEXT_CMD | Encodings::MATH_CMD,
+                               termination, rem, &req);
                        if (!s.empty()) {
                                if (!rem.empty())
                                        cerr << "When parsing " << command
                                             << ", result is " << to_utf8(s)
                                             << "+" << to_utf8(rem) << endl;
                                os << to_utf8(s);
-                               for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+                               for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
                                        preamble.registerAutomaticallyLoadedPackage(*it);
                        } else
                                // we did not find a non-ert version
@@ -4094,10 +4311,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                        // try to see whether the string is in unicodesymbols
                        // Only use text mode commands, since we are in text mode here,
                        // and math commands may be invalid (bug 6797)
+                       bool termination;
                        docstring rem;
                        set<string> req;
                        docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()),
-                                                                Encodings::TEXT_CMD, rem, &req);
+                                       Encodings::TEXT_CMD, termination, rem, &req);
                        if (!s.empty()) {
                                if (!rem.empty())
                                        cerr << "When parsing " << t.cs()
@@ -4105,8 +4323,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                             << "+" << to_utf8(rem) << endl;
                                context.check_layout(os);
                                os << to_utf8(s);
-                               skip_spaces_braces(p);
-                               for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+                               if (termination)
+                                       skip_spaces_braces(p);
+                               for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
                                        preamble.registerAutomaticallyLoadedPackage(*it);
                        }
                        //cerr << "#: " << t << " mode: " << mode << endl;