Cmake build: tex2lyx

[lyx.git] / src / tex2lyx / text.cpp
diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp

index db9f9b5adc7be4b10020a6b7453a2bbdcc40c6d7..f406869153eddb30f1ca4fdf8bab2a04ecc4dfb4 100644 (file)
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -130,17 +130,9 @@ const char * const supported_CJK_encodings[] = {
   * the same as supported_CJK_encodings with their corresponding LyX language name
   * please keep this in sync with supported_CJK_encodings line by line!
   */
-const char * const coded_supported_CJK_encodings[] = {
+const char * const supported_CJK_languages[] = {
  "japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0};
  
-string CJK2lyx(string const & encoding)
-{
-       char const * const * where = is_known(encoding, supported_CJK_encodings);
-       if (where)
-               return coded_supported_CJK_encodings[where - supported_CJK_encodings];
-       return encoding;
-}
-
  /*!
   * natbib commands.
   * The starred forms are also known except for "citefullauthor",
@@ -636,27 +628,40 @@ void output_command_layout(ostream & os, Parser & p, bool outer,
         }
         context.check_deeper(os);
         context.check_layout(os);
-       unsigned int optargs = 0;
-       while (optargs < context.layout->optargs) {
+       // FIXME: Adjust to format 446!
+       // Since format 446, layouts do not require anymore all optional
+       // arguments before the required ones. Needs to be implemented!
+       int optargs = 0;
+       while (optargs < context.layout->optArgs()) {
                 eat_whitespace(p, os, context, false);
                 if (p.next_token().cat() == catEscape ||
                     p.next_token().character() != '[')
                         break;
                 p.get_token(); // eat '['
-               begin_inset(os, "Argument\n");
+               // FIXME: Just a workaround. InsetArgument::updateBuffer
+               //        will compute a proper ID for all "999" Arguments
+               //        (which is also what lyx2lyx produces).
+               //        However, tex2lyx should be able to output proper IDs
+               //        itself.
+               begin_inset(os, "Argument 999\n");
                 os << "status collapsed\n\n";
                 parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
                 end_inset(os);
                 eat_whitespace(p, os, context, false);
                 ++optargs;
         }
-       unsigned int reqargs = 0;
-       while (reqargs < context.layout->reqargs) {
+       int reqargs = 0;
+       while (reqargs < context.layout->requiredArgs()) {
                 eat_whitespace(p, os, context, false);
                 if (p.next_token().cat() != catBegin)
                         break;
                 p.get_token(); // eat '{'
-               begin_inset(os, "Argument\n");
+               // FIXME: Just a workaround. InsetArgument::updateBuffer
+               //        will compute a proper ID for all "999" Arguments
+               //        (which is also what lyx2lyx produces).
+               //        However, tex2lyx should be able to output proper IDs
+               //        itself.
+               begin_inset(os, "Argument 999\n");
                 os << "status collapsed\n\n";
                 parse_text_in_inset(p, os, FLAG_BRACE_LAST, outer, context);
                 end_inset(os);
@@ -822,8 +827,18 @@ void parse_box(Parser & p, ostream & os, unsigned outer_flags,
                         if (inner_type != "makebox") {
                                 latex_height = p.getArg('[', ']');
                                 translate_box_len(latex_height, height_value, height_unit, height_special);
-                       } else
-                               hor_pos = p.getArg('[', ']');
+                       } else {
+                               string const opt = p.getArg('[', ']');
+                               if (!opt.empty()) {
+                                       hor_pos = opt;
+                                       if (hor_pos != "l" && hor_pos != "c" &&
+                                           hor_pos != "r" && hor_pos != "s") {
+                                               cerr << "invalid hor_pos " << hor_pos
+                                                    << " for " << inner_type << endl;
+                                               hor_pos = "c";
+                                       }
+                               }
+                       }
  
                         if (p.hasOpt()) {
                                 inner_pos = p.getArg('[', ']');
@@ -847,7 +862,7 @@ void parse_box(Parser & p, ostream & os, unsigned outer_flags,
                         if (!opt.empty()) {
                                 hor_pos = opt;
                                 if (hor_pos != "l" && hor_pos != "c" &&
-                                   hor_pos != "r") {
+                                   hor_pos != "r" && hor_pos != "s") {
                                         cerr << "invalid hor_pos " << hor_pos
                                              << " for " << outer_type << endl;
                                         hor_pos = "c";
@@ -1152,6 +1167,8 @@ void parse_listings(Parser & p, ostream & os, Context & parent_context, bool in_
         if (p.hasOpt()) {
                 string arg = p.verbatimOption();
                 os << "lstparams " << '"' << arg << '"' << '\n';
+               if (arg.find("\\color") != string::npos)
+                       preamble.registerAutomaticallyLoadedPackage("color");
         }
         if (in_line)
                 os << "inline true\n";
@@ -1413,7 +1430,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                 // This hack must be removed once bug 8049 is fixed!
                                 if ((it + 1 != et) && (it + 2 != et || *it2 != '\n'))
                                         os << "\n\\end_layout\n\\begin_layout Verbatim\n";
-                       } else 
+                       } else
                                 os << *it;
                 }
                 os << "\n\\end_layout\n\n";
@@ -1440,8 +1457,9 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 // LyX doesn't support the second argument so if
                 // this is used we need to output everything as ERT
                 string const mapping = p.getArg('{', '}');
-               if ((!mapping.empty() && mapping != " ")
-                       || (!is_known(encoding, supported_CJK_encodings))) {
+               char const * const * const where =
+                       is_known(encoding, supported_CJK_encodings);
+               if ((!mapping.empty() && mapping != " ") || !where) {
                         parent_context.check_layout(os);
                         handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
                                        parent_context);
@@ -1453,13 +1471,14 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                         handle_ert(os, "\\", parent_context);
                                 else if (*it == '$')
                                         handle_ert(os, "$", parent_context);
-                               else 
+                               else
                                         os << *it;
                         }
                         handle_ert(os, "\\end{" + name + "}",
                                        parent_context);
                 } else {
-                       string const lang = CJK2lyx(encoding);
+                       string const lang =
+                               supported_CJK_languages[where - supported_CJK_encodings];
                         // store the language because we must reset it at the end
                         string const lang_old = parent_context.font.language;
                         parent_context.font.language = lang;
@@ -1491,8 +1510,6 @@ void parse_environment(Parser & p, ostream & os, bool outer,
  
         else if (name == "lstlisting") {
                 eat_whitespace(p, os, parent_context, false);
-               // FIXME handle the automatic color package loading
-               // uwestoehr asks: In what case color is loaded?
                 parse_listings(p, os, parent_context, false);
                 p.skip_spaces();
         }
@@ -1593,14 +1610,15 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                 }
                 context.check_deeper(os);
                 // handle known optional and required arguments
-               // layouts require all optional arguments before the required ones
+               // FIXME: Since format 446, layouts do not require anymore all optional
+               // arguments before the required ones. Needs to be implemented!
                 // Unfortunately LyX can't handle arguments of list arguments (bug 7468):
                 // It is impossible to place anything after the environment name,
                 // but before the first \\item.
                 if (context.layout->latextype == LATEX_ENVIRONMENT) {
                         bool need_layout = true;
-                       unsigned int optargs = 0;
-                       while (optargs < context.layout->optargs) {
+                       int optargs = 0;
+                       while (optargs < context.layout->optArgs()) {
                                 eat_whitespace(p, os, context, false);
                                 if (p.next_token().cat() == catEscape ||
                                     p.next_token().character() != '[')
@@ -1610,15 +1628,20 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                         context.check_layout(os);
                                         need_layout = false;
                                 }
-                               begin_inset(os, "Argument\n");
+                               // FIXME: Just a workaround. InsetArgument::updateBuffer
+                               //        will compute a proper ID for all "999" Arguments
+                               //        (which is also what lyx2lyx produces).
+                               //        However, tex2lyx should be able to output proper IDs
+                               //        itself.
+                               begin_inset(os, "Argument 999\n");
                                 os << "status collapsed\n\n";
                                 parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
                                 end_inset(os);
                                 eat_whitespace(p, os, context, false);
                                 ++optargs;
                         }
-                       unsigned int reqargs = 0;
-                       while (reqargs < context.layout->reqargs) {
+                       int reqargs = 0;
+                       while (reqargs < context.layout->requiredArgs()) {
                                 eat_whitespace(p, os, context, false);
                                 if (p.next_token().cat() != catBegin)
                                         break;
@@ -1627,7 +1650,12 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                         context.check_layout(os);
                                         need_layout = false;
                                 }
-                               begin_inset(os, "Argument\n");
+                               // FIXME: Just a workaround. InsetArgument::updateBuffer
+                               //        will compute a proper ID for all "999" Arguments
+                               //        (which is also what lyx2lyx produces).
+                               //        However, tex2lyx should be able to output proper IDs
+                               //        itself.
+                               begin_inset(os, "Argument 999\n");
                                 os << "status collapsed\n\n";
                                 parse_text_in_inset(p, os, FLAG_BRACE_LAST, outer, context);
                                 end_inset(os);
@@ -1842,28 +1870,31 @@ void fix_child_filename(string & name)
         if (!isabs)
                 name = makeAbsPath(name, absMasterTeX).absFileName();
         bool copyfile = copyFiles();
-       // convert from absolute original path to "relative to master file"
-       string const rel = to_utf8(makeRelPath(from_utf8(name),
-                                              from_utf8(absMasterTeX)));
-       // Do not copy if the file is not in or below the directory of the
-       // master, since in this case the new path might be impossible to
-       // create. Example:
-       // absMasterTeX = "/foo/bar/"
-       // absMasterLyX = "/bar/"
-       // name = "/baz.eps" => new absolute name would be "/../baz.eps"
-       if (copyfile && rel.substr(0, 3) == "../")
-               copyfile = false;
         string const absParentLyX = getParentFilePath(false);
+       string abs = name;
         if (copyfile) {
+               // convert from absolute original path to "relative to master file"
+               string const rel = to_utf8(makeRelPath(from_utf8(name),
+                                                      from_utf8(absMasterTeX)));
                 // re-interpret "relative to .tex file" as "relative to .lyx file"
                 // (is different if the master .lyx file resides in a
                 // different path than the master .tex file)
                 string const absMasterLyX = getMasterFilePath(false);
-               name = makeAbsPath(rel, absMasterLyX).absFileName();
-               if (!isabs) {
+               abs = makeAbsPath(rel, absMasterLyX).absFileName();
+               // Do not copy if the new path is impossible to create. Example:
+               // absMasterTeX = "/foo/bar/"
+               // absMasterLyX = "/bar/"
+               // name = "/baz.eps" => new absolute name would be "/../baz.eps"
+               if (contains(name, "/../"))
+                       copyfile = false;
+       }
+       if (copyfile) {
+               if (isabs)
+                       name = abs;
+               else {
                         // convert from absolute original path to
                         // "relative to .lyx file"
-                       name = to_utf8(makeRelPath(from_utf8(name),
+                       name = to_utf8(makeRelPath(from_utf8(abs),
                                                    from_utf8(absParentLyX)));
                 }
         }
@@ -1886,12 +1917,6 @@ void copy_file(FileName const & src, string dstname)
         else
                 dst = makeAbsPath(dstname, absParent);
         string const absMaster = getMasterFilePath(false);
-       string const rel = to_utf8(makeRelPath(from_utf8(dst.absFileName()),
-                                              from_utf8(absMaster)));
-       // Do not copy if the file is not in or below the directory of the
-       // master (see above)
-       if (rel.substr(0, 3) == "../")
-               return;
         FileName const srcpath = src.onlyPath();
         FileName const dstpath = dst.onlyPath();
         if (equivalent(srcpath, dstpath))
@@ -1919,27 +1944,27 @@ void copy_file(FileName const & src, string dstname)
  }
  
  
-/// Parse a NoWeb Scrap section. The initial "<<" is already parsed.
+/// Parse a NoWeb Chunk section. The initial "<<" is already parsed.
  void parse_noweb(Parser & p, ostream & os, Context & context)
  {
         // assemble the rest of the keyword
         string name("<<");
-       bool scrap = false;
+       bool chunk = false;
         while (p.good()) {
                 Token const & t = p.get_token();
                 if (t.asInput() == ">" && p.next_token().asInput() == ">") {
                         name += ">>";
                         p.get_token();
-                       scrap = (p.good() && p.next_token().asInput() == "=");
-                       if (scrap)
+                       chunk = (p.good() && p.next_token().asInput() == "=");
+                       if (chunk)
                                 name += p.get_token().asInput();
                         break;
                 }
                 name += t.asInput();
         }
  
-       if (!scrap || !context.new_layout_allowed ||
-           !context.textclass.hasLayout(from_ascii("Scrap"))) {
+       if (!chunk || !context.new_layout_allowed ||
+           !context.textclass.hasLayout(from_ascii("Chunk"))) {
                 cerr << "Warning: Could not interpret '" << name
                      << "'. Ignoring it." << endl;
                 return;
@@ -1953,7 +1978,7 @@ void parse_noweb(Parser & p, ostream & os, Context & context)
         // always must be in an own paragraph.
         context.new_paragraph(os);
         Context newcontext(true, context.textclass,
-               &context.textclass[from_ascii("Scrap")]);
+               &context.textclass[from_ascii("Chunk")]);
         newcontext.check_layout(os);
         os << name;
         while (p.good()) {
@@ -1965,12 +1990,12 @@ void parse_noweb(Parser & p, ostream & os, Context & context)
                 else {
                         ostringstream oss;
                         Context tmp(false, context.textclass,
-                                   &context.textclass[from_ascii("Scrap")]);
+                                   &context.textclass[from_ascii("Chunk")]);
                         tmp.need_end_layout = true;
                         tmp.check_layout(oss);
                         os << subst(t.asInput(), "\n", oss.str());
                 }
-               // The scrap chunk is ended by an @ at the beginning of a line.
+               // The chunk is ended by an @ at the beginning of a line.
                 // After the @ the line may contain a comment and/or
                 // whitespace, but nothing else.
                 if (t.asInput() == "@" && p.prev_token().cat() == catNewline &&
@@ -2142,24 +2167,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
         while (p.good()) {
                 Token const & t = p.get_token();
  
-       // it is impossible to determine the correct document language if CJK is used.
-       // Therefore write a note at the beginning of the document
-       if (have_CJK) {
-               context.check_layout(os);
-               begin_inset(os, "Note Note\n");
-               os << "status open\n\\begin_layout Plain Layout\n"
-                  << "\\series bold\n"
-                  << "Important information:\n"
-                  << "\\end_layout\n\n"
-                  << "\\begin_layout Plain Layout\n"
-                  << "This document contains text in Chinese, Japanese or Korean.\n"
-                  << " It was therefore impossible for tex2lyx to set the correct document langue for your document."
-                  << " Please set the language manually in the document settings.\n"
-                  << "\\end_layout\n";
-               end_inset(os);
-               have_CJK = false;
-       }
-
         // it is impossible to determine the correct encoding for non-CJK Japanese.
         // Therefore write a note at the beginning of the document
         if (is_nonCJKJapanese) {
@@ -2275,6 +2282,12 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         skip_braces(p);
                 }
  
+               else if (t.asInput() == "<"
+                        && p.next_token().asInput() == "<" && noweb_mode) {
+                       p.get_token();
+                       parse_noweb(p, os, context);
+               }
+
                 else if (t.asInput() == "<" && p.next_token().asInput() == "<") {
                         context.check_layout(os);
                         begin_inset(os, "Quotes ");
@@ -2284,12 +2297,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         skip_braces(p);
                 }
  
-               else if (t.asInput() == "<"
-                        && p.next_token().asInput() == "<" && noweb_mode) {
-                       p.get_token();
-                       parse_noweb(p, os, context);
-               }
-
                 else if (t.cat() == catSpace || (t.cat() == catNewline && ! p.isParagraph()))
                         check_space(p, os, context);
  
@@ -2739,13 +2746,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         context.check_layout(os);
                         p.skip_spaces();
                         begin_inset(os, "Caption\n");
-                       Context newcontext(true, context.textclass);
-                       newcontext.font = context.font;
+                       Context newcontext(true, context.textclass, 0, 0, context.font);
                         newcontext.check_layout(os);
+                       // FIXME InsetArgument is now properly implemented in InsetLayout
+                       //       (for captions, but also for others)
                         if (p.next_token().cat() != catEscape &&
                             p.next_token().character() == '[') {
                                 p.get_token(); // eat '['
-                               begin_inset(os, "Argument\n");
+                               begin_inset(os, "Argument 1\n");
                                 os << "status collapsed\n";
                                 parse_text_in_inset(p, os, FLAG_BRACK_LAST, outer, context);
                                 end_inset(os);
@@ -2790,8 +2798,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                         os << "\n\\begin_layout Plain Layout";
                                         p.skip_spaces();
                                         begin_inset(os, "Caption\n");
-                                       Context newcontext(true, context.textclass);
-                                       newcontext.font = context.font;
+                                       Context newcontext(true, context.textclass,
+                                                          0, 0, context.font);
                                         newcontext.check_layout(os);
                                         os << caption << "\n";
                                         newcontext.check_end_layout(os);
@@ -3210,8 +3218,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
  
                 else if (t.cs() == "href") {
                         context.check_layout(os);
-                       string target = p.getArg('{', '}');
-                       string name = p.getArg('{', '}');
+                       string target = convert_command_inset_arg(p.verbatim_item());
+                       string name = convert_command_inset_arg(p.verbatim_item());
                         string type;
                         size_t i = target.find(':');
                         if (i != string::npos) {
@@ -3617,8 +3625,8 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                               context, "\\lang",
                                               context.font.language, lang);
                 }
-               
-               else if (prefixIs(t.cs(), "text") 
+
+               else if (prefixIs(t.cs(), "text")
                          && is_known(t.cs().substr(4), preamble.polyglossia_languages)) {
                         // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[]
                         string lang;
@@ -3729,7 +3737,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                 else if (t.cs() == "verb") {
                         context.check_layout(os);
                         char const delimiter = p.next_token().character();
-                       string const arg = p.getArg(delimiter, delimiter);
+                       // \verb is special: The usual escaping rules do not
+                       // apply, e.g. "\verb+\+" is valid and denotes a single
+                       // backslash (bug #4468). Therefore we do not allow
+                       // escaping in getArg().
+                       string const arg = p.getArg(delimiter, delimiter, false);
                         ostringstream oss;
                         oss << "\\verb" << delimiter << arg << delimiter;
                         handle_ert(os, oss.str(), context);
@@ -3828,8 +3840,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                 string const abslyxname = makeAbsPath(
                                         lyxname, getParentFilePath(false)).absFileName();
                                 bool xfig = false;
-                               external = FileName(absfigname).exists();
-                               if (t.cs() == "input") {
+                               if (!skipChildren())
+                                       external = FileName(absfigname).exists();
+                               if (t.cs() == "input" && !skipChildren()) {
                                         string const ext = getExtension(abstexname);
  
                                         // Combined PS/LaTeX:
@@ -3881,6 +3894,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                         FileName abssrc(abstexname);
                                         copy_file(abssrc, outname);
                                 } else if (t.cs() != "verbatiminput" &&
+                                          !skipChildren() &&
                                     tex2lyx(abstexname, FileName(abslyxname),
                                             p.getEncoding())) {
                                         outname = lyxname;
@@ -4396,7 +4410,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                         /*
                         string s = t.asInput();
                         string z = p.verbatim_item();
-                       while (p.good() && z != " " && z.size()) {
+                       while (p.good() && z != " " && !z.empty()) {
                                 //cerr << "read: " << z << endl;
                                 s += z;
                                 z = p.verbatim_item();
@@ -4423,6 +4437,79 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
         }
  }
  
+
+string guessLanguage(Parser & p, string const & lang)
+{
+       typedef std::map<std::string, size_t> LangMap;
+       // map from language names to number of characters
+       LangMap used;
+       used[lang] = 0;
+       for (char const * const * i = supported_CJK_languages; *i; i++)
+               used[string(*i)] = 0;
+
+       while (p.good()) {
+               Token const t = p.get_token();
+               // comments are not counted for any language
+               if (t.cat() == catComment)
+                       continue;
+               // commands are not counted as well, but we need to detect
+               // \begin{CJK} and switch encoding if needed
+               if (t.cat() == catEscape) {
+                       if (t.cs() == "inputencoding") {
+                               string const enc = subst(p.verbatim_item(), "\n", " ");
+                               p.setEncoding(enc);
+                               continue;
+                       }
+                       if (t.cs() != "begin")
+                               continue;
+               } else {
+                       // Non-CJK content is counted for lang.
+                       // We do not care about the real language here:
+                       // If we have more non-CJK contents than CJK contents,
+                       // we simply use the language that was specified as
+                       // babel main language.
+                       used[lang] += t.asInput().length();
+                       continue;
+               }
+               // Now we are starting an environment
+               p.pushPosition();
+               string const name = p.getArg('{', '}');
+               if (name != "CJK") {
+                       p.popPosition();
+                       continue;
+               }
+               // It is a CJK environment
+               p.popPosition();
+               /* name = */ p.getArg('{', '}');
+               string const encoding = p.getArg('{', '}');
+               /* mapping = */ p.getArg('{', '}');
+               string const encoding_old = p.getEncoding();
+               char const * const * const where =
+                       is_known(encoding, supported_CJK_encodings);
+               if (where)
+                       p.setEncoding(encoding);
+               else
+                       p.setEncoding("utf8");
+               string const text = p.verbatimEnvironment("CJK");
+               p.setEncoding(encoding_old);
+               p.skip_spaces();
+               if (!where) {
+                       // ignore contents in unknown CJK encoding
+                       continue;
+               }
+               // the language of the text
+               string const cjk =
+                       supported_CJK_languages[where - supported_CJK_encodings];
+               used[cjk] += text.length();
+       }
+       LangMap::const_iterator use = used.begin();
+       for (LangMap::const_iterator it = used.begin(); it != used.end(); ++it) {
+               if (it->second > use->second)
+                       use = it;
+       }
+       return use->first;
+}
+
  // }])