From d3c385de20d3204c1aa4d7f41297554f09fc1881 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sun, 23 Jan 2011 21:10:20 +0000 Subject: [PATCH] Next step towards a working roundtrip of files generated by LyX: Try to recognize modules. Again, this is needed because the complete LyX preamble is ignored. It is not possible to recognize a module in all cases, but at least the simple ones are handled now. As a prerequisite I also had to revive the filling of known_environments. This has been removed (probably by accident) some time ago. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@37306 a592a061-630c-0410-9148-cb99ea01b6c8 --- development/scons/scons_manifest.py | 1 + src/tex2lyx/Makefile.am | 1 + src/tex2lyx/preamble.cpp | 49 +++++--- src/tex2lyx/tex2lyx.cpp | 185 +++++++++++++++++++++++++++- src/tex2lyx/tex2lyx.h | 54 +++++++- src/tex2lyx/text.cpp | 49 ++++---- 6 files changed, 287 insertions(+), 52 deletions(-) diff --git a/development/scons/scons_manifest.py b/development/scons/scons_manifest.py index cc1c79d8bb..7886714e36 100644 --- a/development/scons/scons_manifest.py +++ b/development/scons/scons_manifest.py @@ -628,6 +628,7 @@ src_tex2lyx_copied_files = Split(''' Floating.cpp FontInfo.cpp Layout.cpp + LayoutFile.cpp LayoutModuleList.cpp lengthcommon.cpp Lexer.cpp diff --git a/src/tex2lyx/Makefile.am b/src/tex2lyx/Makefile.am index 4f8b1ac54d..93b1c7cb9a 100644 --- a/src/tex2lyx/Makefile.am +++ b/src/tex2lyx/Makefile.am @@ -37,6 +37,7 @@ LINKED_FILES = \ ../FontInfo.cpp \ ../insets/InsetLayout.cpp \ ../Layout.cpp \ + ../LayoutFile.cpp \ ../LayoutModuleList.cpp \ ../lengthcommon.cpp \ ../Lexer.cpp \ diff --git a/src/tex2lyx/preamble.cpp b/src/tex2lyx/preamble.cpp index d8624a2ba2..e7aaf18f18 100644 --- a/src/tex2lyx/preamble.cpp +++ b/src/tex2lyx/preamble.cpp @@ -44,6 +44,7 @@ namespace lyx { extern map special_columns; map > used_packages; +const char * const modules_placeholder = "\001modules\001"; // needed to handle encodings with babel bool one_language = true; @@ -674,6 +675,7 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/) if (!h_options.empty()) os << "\\options " << h_options << "\n"; os << "\\use_default_options " << h_use_default_options << "\n" + << modules_placeholder << "\\language " << h_language << "\n" << "\\inputencoding " << h_inputencoding << "\n" << "\\font_roman " << h_font_roman << "\n" @@ -795,7 +797,6 @@ void parse_preamble(Parser & p, ostream & os, h_preamble << t.asInput(); else if (t.cat() == catComment) { - // regex to parse comments (currently not used) static regex const islyxfile("%% LyX .* created this file"); static regex const usercommands("User specified LaTeX commands"); @@ -834,9 +835,11 @@ void parse_preamble(Parser & p, ostream & os, p.setCatCode('@', catOther); } - else if (t.cs() == "newcommand" || t.cs() == "renewcommand" - || t.cs() == "providecommand" + else if (t.cs() == "newcommand" || t.cs() == "newcommandx" + || t.cs() == "renewcommand" || t.cs() == "renewcommandx" + || t.cs() == "providecommand" || t.cs() == "providecommandx" || t.cs() == "DeclareRobustCommand" + || t.cs() == "DeclareRobustCommandx" || t.cs() == "ProvideTextCommandDefault" || t.cs() == "DeclareMathAccent") { bool star = false; @@ -863,6 +866,10 @@ void parse_preamble(Parser & p, ostream & os, // remove leading "\" h_font_default_family = family.erase(0,1); } + + // Add the command to the known commands + add_known_command(name, opt1, !opt2.empty(), from_utf8(body)); + // only non-lyxspecific stuff if (!in_lyx_preamble) { ostringstream ss; @@ -872,9 +879,6 @@ void parse_preamble(Parser & p, ostream & os, ss << '{' << name << '}' << opt1 << opt2 << '{' << body << "}"; h_preamble << ss.str(); - - // Add the command to the known commands - add_known_command(name, opt1, !opt2.empty()); /* ostream & out = in_preamble ? h_preamble : os; out << "\\" << t.cs() << "{" << name << "}" @@ -897,7 +901,7 @@ void parse_preamble(Parser & p, ostream & os, // options. handle_opt(opts, known_languages, h_language); delete_opt(opts, known_languages); - + // paper orientation if ((it = find(opts.begin(), opts.end(), "landscape")) != opts.end()) { h_paperorientation = "landscape"; @@ -932,6 +936,8 @@ void parse_preamble(Parser & p, ostream & os, delete_opt(opts, known_class_paper_sizes); // the remaining options h_options = join(opts, ","); + // FIXME This does not work for classes that have a + // different name in LyX than in LaTeX h_textclass = p.getArg('{', '}'); } @@ -955,14 +961,18 @@ void parse_preamble(Parser & p, ostream & os, else if (t.cs() == "newenvironment") { string const name = p.getArg('{', '}'); - ostringstream ss; - ss << "\\newenvironment{" << name << "}"; - ss << p.getOpt(); - ss << p.getOpt(); - ss << '{' << p.verbatim_item() << '}'; - ss << '{' << p.verbatim_item() << '}'; - if (!in_lyx_preamble) - h_preamble << ss.str(); + string const opt1 = p.getOpt(); + string const opt2 = p.getOpt(); + string const beg = p.verbatim_item(); + string const end = p.verbatim_item(); + if (!in_lyx_preamble) { + h_preamble << "\\newenvironment{" << name + << '}' << opt1 << opt2 << '{' + << beg << "}{" << end << '}'; + } + add_known_environment(name, opt1, !opt2.empty(), + from_utf8(beg), from_utf8(end)); + } else if (t.cs() == "def") { @@ -1146,12 +1156,11 @@ void parse_preamble(Parser & p, ostream & os, h_textclass = forceclass; if (noweb_mode && !prefixIs(h_textclass, "literate-")) h_textclass.insert(0, "literate-"); - FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout"); - if (layoutfilename.empty()) { - cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl; - exit(1); + tc.setName(h_textclass); + if (!tc.load()) { + cerr << "Error: Could not read layout file for textclass \"" << h_textclass << "\"." << endl; + exit(EXIT_FAILURE); } - tc.read(layoutfilename); if (h_papersides.empty()) { ostringstream ss; ss << tc.sides(); diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 5467b5e015..8f373d83ed 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -17,6 +17,9 @@ #include "Context.h" #include "Encoding.h" #include "Layout.h" +#include "LayoutFile.h" +#include "LayoutModuleList.h" +#include "ModuleList.h" #include "TextClass.h" #include "support/convert.h" @@ -135,13 +138,18 @@ string active_environment() } +TeX2LyXDocClass textclass; CommandMap known_commands; CommandMap known_environments; CommandMap known_math_environments; +FullCommandMap possible_textclass_commands; +FullEnvironmentMap possible_textclass_environments; +/// used modules +LayoutModuleList used_modules; -void add_known_command(string const & command, string const & o1, - bool o2) + +void convertArgs(string const & o1, bool o2, vector & arguments) { // We have to handle the following cases: // definition o1 o2 invocation result @@ -151,7 +159,6 @@ void add_known_command(string const & command, string const & o1, // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo[x] bar x // \newcommand{\foo}[1][x]{bar #1} "[1]" true \foo[x] bar x unsigned int nargs = 0; - vector arguments; string const opt1 = rtrim(ltrim(o1, "["), "]"); if (isStrUnsignedInt(opt1)) { // The command has arguments @@ -164,7 +171,155 @@ void add_known_command(string const & command, string const & o1, } for (unsigned int i = 0; i < nargs; ++i) arguments.push_back(required); +} + + +void add_known_command(string const & command, string const & o1, + bool o2, docstring const & definition) +{ + vector arguments; + convertArgs(o1, o2, arguments); known_commands[command] = arguments; + if (!definition.empty()) + possible_textclass_commands[command] = + FullCommand(arguments, definition); +} + + +void add_known_environment(string const & environment, string const & o1, + bool o2, docstring const & beg, docstring const &end) +{ + vector arguments; + convertArgs(o1, o2, arguments); + known_environments[environment] = arguments; + if (!beg.empty() || ! end.empty()) + possible_textclass_environments[environment] = + FullEnvironment(arguments, beg, end); +} + + +Layout const * findLayoutWithoutModule(TextClass const & textclass, + string const & name, bool command) +{ + DocumentClass::const_iterator it = textclass.begin(); + DocumentClass::const_iterator en = textclass.end(); + for (; it != en; ++it) { + if (it->latexname() == name && + ((command && it->isCommand()) || (!command && it->isEnvironment()))) + return &*it; + } + return 0; +} + + +InsetLayout const * findInsetLayoutWithoutModule(TextClass const & textclass, + string const & name, bool command) +{ + DocumentClass::InsetLayouts::const_iterator it = textclass.insetLayouts().begin(); + DocumentClass::InsetLayouts::const_iterator en = textclass.insetLayouts().end(); + for (; it != en; ++it) { + if (it->second.latexname() == name && + ((command && it->second.latextype() == InsetLayout::COMMAND) || + (!command && it->second.latextype() == InsetLayout::ENVIRONMENT))) + return &(it->second); + } + return 0; +} + + +bool checkModule(string const & name, bool command) +{ + // Cache to avoid slowdown by repated searches + static set failed[2]; + + // Only add the module if the command was actually defined in the LyX preamble + if (command) { + if (possible_textclass_commands.find('\\' + name) == possible_textclass_commands.end()) + return false; + } else { + if (possible_textclass_environments.find(name) == possible_textclass_environments.end()) + return false; + } + if (failed[command].find(name) != failed[command].end()) + return false; + + // Create list of dummy document classes if not already done. + // This is needed since a module cannot be read on its own, only as + // part of a document class. + LayoutFile const & baseClass = LayoutFileList::get()[textclass.name()]; + typedef map ModuleMap; + static ModuleMap modules; + static bool init = true; + if (init) { + baseClass.load(); + DocumentClassBundle & bundle = DocumentClassBundle::get(); + LyXModuleList::const_iterator const end = theModuleList.end(); + LyXModuleList::const_iterator it = theModuleList.begin(); + for (; it != end; it++) { + string const module = it->getID(); + LayoutModuleList m; + // FIXME this excludes all modules that depend on another one + if (!m.moduleCanBeAdded(module, &baseClass)) + continue; + m.push_back(module); + modules[module] = &bundle.makeDocumentClass(baseClass, m); + } + init = false; + } + + // Try to find a module that defines the command. + // Only add it if the definition can be found in the preamble of the + // style that corresponds to the command. This is a heuristic and + // different from the way how we parse the builtin commands of the + // text class (in that case we only compare the name), but it is + // needed since it is not unlikely that two different modules define a + // command with the same name. + ModuleMap::iterator const end = modules.end(); + for (ModuleMap::iterator it = modules.begin(); it != end; it++) { + string const module = it->first; + if (!used_modules.moduleCanBeAdded(module, &baseClass)) + continue; + if (findLayoutWithoutModule(textclass, name, command)) + continue; + if (findInsetLayoutWithoutModule(textclass, name, command)) + continue; + DocumentClass const * c = it->second; + Layout const * layout = findLayoutWithoutModule(*c, name, command); + InsetLayout const * insetlayout = layout ? 0 : + findInsetLayoutWithoutModule(*c, name, command); + docstring preamble; + if (layout) + preamble = layout->preamble(); + else if (insetlayout) + preamble = insetlayout->preamble(); + if (preamble.empty()) + continue; + bool add = false; + if (command) { + FullCommand const & cmd = + possible_textclass_commands['\\' + name]; + if (preamble.find(cmd.def) != docstring::npos) + add = true; + } else { + FullEnvironment const & env = + possible_textclass_environments[name]; + if (preamble.find(env.beg) != docstring::npos && + preamble.find(env.end) != docstring::npos) + add = true; + } + if (add) { + FileName layout_file = libFileSearch("layouts", module, "module"); + if (textclass.read(layout_file, TextClass::MODULE)) { + used_modules.push_back(module); + // speed up further searches: + // the module does not need to be checked anymore. + modules.erase(it); + return true; + } + } + } + failed[command].insert(name); + return false; } @@ -460,12 +615,12 @@ void tex2lyx(idocstream & is, ostream & os, string const & encoding) p.setEncoding(encoding); //p.dump(); - stringstream ss; - TeX2LyXDocClass textclass; - parse_preamble(p, ss, documentclass, textclass); + ostringstream ps; + parse_preamble(p, ps, documentclass, textclass); active_environments.push_back("document"); Context context(true, textclass); + stringstream ss; parse_text(p, ss, FLAG_END, true, context); if (Context::empty) // Empty document body. LyX needs at least one paragraph. @@ -473,6 +628,19 @@ void tex2lyx(idocstream & is, ostream & os, string const & encoding) context.check_end_layout(ss); ss << "\n\\end_body\n\\end_document\n"; active_environments.pop_back(); + + // We know the used modules only after parsing the full text + ostringstream ms; + if (!used_modules.empty()) { + ms << "\\begin_modules\n"; + LayoutModuleList::const_iterator const end = used_modules.end(); + LayoutModuleList::const_iterator it = used_modules.begin(); + for (; it != end; it++) + ms << *it << '\n'; + ms << "\\end_modules\n"; + } + os << subst(ps.str(), modules_placeholder, ms.str()); + ss.seekg(0); os << ss.str(); #ifdef TEST_PARSER @@ -637,6 +805,11 @@ int main(int argc, char * argv[]) if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding)) error_message("Unknown LaTeX encoding `" + default_encoding + "'"); + // Load the layouts + LayoutFileList::get().read(); + //...and the modules + theModuleList.read(); + // The real work now. masterFilePath = onlyPath(infilename); parentFilePath = masterFilePath; diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index da3ee8015e..6a3057f188 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -38,7 +38,10 @@ class Context; /// A trivial subclass, just to give us a public default constructor class TeX2LyXDocClass : public DocumentClass -{}; +{ +public: + void setName(std::string const & name) { name_ = name; } +}; /// in preamble.cpp void parse_preamble(Parser & p, std::ostream & os, @@ -48,6 +51,7 @@ extern std::string babel2lyx(std::string const & language); /// used packages with options extern std::map > used_packages; +extern const char * const modules_placeholder; /// in text.cpp std::string translate_len(std::string const &); @@ -89,13 +93,27 @@ char const * const * is_known(std::string const &, char const * const *); /*! * Adds the command \p command to the list of known commands. - * \param o1 first optional parameter to the latex command \newcommand + * \param o1 first optional parameter to the latex command \\newcommand * (with brackets), or the empty string if there were no optional arguments. - * \param o2 wether \newcommand had a second optional parameter + * \param o2 wether \\newcommand had a second optional parameter. + * If \p definition is not empty the command is assumed to be from the LyX + * preamble and added to possible_textclass_commands. */ void add_known_command(std::string const & command, std::string const & o1, - bool o2); - + bool o2, docstring const & definition = docstring()); +extern void add_known_environment(std::string const & environment, + std::string const & o1, bool o2, docstring const & beg, + docstring const & end); +extern Layout const * findLayoutWithoutModule(TextClass const & textclass, + std::string const & name, bool command); +extern InsetLayout const * findInsetLayoutWithoutModule( + TextClass const & textclass, std::string const & name, bool command); +/*! + * Check whether a module provides command (if \p command is true) or + * environment (if \p command is false) \p name, and add the module to the + * list of used modules if yes. + */ +extern bool checkModule(std::string const & name, bool command); // Access to environment stack extern std::vector active_environments; std::string active_environment(); @@ -107,7 +125,29 @@ enum ArgumentType { optional }; +class FullCommand { +public: + FullCommand() {} + FullCommand(std::vector const & a, docstring const & d) + : args(a), def(d) {} + std::vector args; + docstring def; +}; + +class FullEnvironment { +public: + FullEnvironment() {} + FullEnvironment(std::vector const & a, + docstring const & b, docstring const & e) + : args(a), beg(b), end(e) {} + std::vector args; + docstring beg; + docstring end; +}; + typedef std::map > CommandMap; +typedef std::map FullCommandMap; +typedef std::map FullEnvironmentMap; /// Known TeX commands with arguments that get parsed into ERT. extern CommandMap known_commands; @@ -115,6 +155,10 @@ extern CommandMap known_commands; extern CommandMap known_environments; /// Known TeX math environments with arguments that get parsed into LyX mathed. extern CommandMap known_math_environments; +/// Commands that might be defined by the document class or modules +extern FullCommandMap possible_textclass_commands; +/// Environments that might be defined by the document class or modules +extern FullEnvironmentMap possible_textclass_environments; /// extern bool noweb_mode; /// Did we recognize any pdflatex-only construct? diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 7f633f4a04..adda18c386 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -452,27 +452,25 @@ void handle_comment(ostream & os, string const & s, Context & context) } -Layout const * findLayout(TextClass const & textclass, string const & name) +Layout const * findLayout(TextClass const & textclass, string const & name, bool command) { - DocumentClass::const_iterator lit = textclass.begin(); - DocumentClass::const_iterator len = textclass.end(); - for (; lit != len; ++lit) - if (lit->latexname() == name) - return &*lit; - return 0; + Layout const * layout = findLayoutWithoutModule(textclass, name, command); + if (layout) + return layout; + if (checkModule(name, command)) + return findLayoutWithoutModule(textclass, name, command); + return layout; } InsetLayout const * findInsetLayout(TextClass const & textclass, string const & name, bool command) { - DocumentClass::InsetLayouts::const_iterator it = textclass.insetLayouts().begin(); - DocumentClass::InsetLayouts::const_iterator en = textclass.insetLayouts().end(); - for (; it != en; ++it) - if (it->second.latexname() == name && - ((command && it->second.latextype() == InsetLayout::COMMAND) || - (!command && it->second.latextype() == InsetLayout::ENVIRONMENT))) - return &(it->second); - return 0; + InsetLayout const * insetlayout = findInsetLayoutWithoutModule(textclass, name, command); + if (insetlayout) + return insetlayout; + if (checkModule(name, command)) + return findInsetLayoutWithoutModule(textclass, name, command); + return insetlayout; } @@ -952,6 +950,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, string & last_env, Context & parent_context) { Layout const * newlayout; + InsetLayout const * newinsetlayout = 0; string const name = p.getArg('{', '}'); const bool is_starred = suffixIs(name, '*'); string const unstarred_name = rtrim(name, "*"); @@ -1069,8 +1068,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, } // The single '=' is meant here. - else if ((newlayout = findLayout(parent_context.textclass, name)) && - newlayout->isEnvironment()) { + else if ((newlayout = findLayout(parent_context.textclass, name, false))) { eat_whitespace(p, os, parent_context, false); Context context(true, parent_context.textclass, newlayout, parent_context.layout, parent_context.font); @@ -1129,6 +1127,17 @@ void parse_environment(Parser & p, ostream & os, bool outer, p.skip_spaces(); } + // The single '=' is meant here. + else if ((newinsetlayout = findInsetLayout(parent_context.textclass, name, false))) { + eat_whitespace(p, os, parent_context, false); + parent_context.check_layout(os); + begin_inset(os, "Flex "); + os << to_utf8(newinsetlayout->name()) << '\n' + << "status collapsed\n"; + parse_text_in_inset(p, os, FLAG_END, false, parent_context, newinsetlayout); + end_inset(os); + } + else if (name == "appendix") { // This is no good latex style, but it works and is used in some documents... eat_whitespace(p, os, parent_context, false); @@ -1905,8 +1914,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // Must attempt to parse "Section*" before "Section". else if ((p.next_token().asInput() == "*") && context.new_layout_allowed && - (newlayout = findLayout(context.textclass, t.cs() + '*')) && - newlayout->isCommand()) { + (newlayout = findLayout(context.textclass, t.cs() + '*', true))) { // write the layout p.get_token(); output_command_layout(os, p, outer, context, newlayout); @@ -1915,8 +1923,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, // Section headings and the like else if (context.new_layout_allowed && - (newlayout = findLayout(context.textclass, t.cs())) && - newlayout->isCommand()) { + (newlayout = findLayout(context.textclass, t.cs(), true))) { // write the layout output_command_layout(os, p, outer, context, newlayout); p.skip_spaces(); -- 2.39.2