3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * Full author contact details are available in file CREDITS.
19 #include "lyxtextclass.h"
21 #include "support/convert.h"
22 #include "support/filetools.h"
23 #include "support/fs_extras.h"
24 #include "support/lstrings.h"
25 #include "support/lyxlib.h"
26 #include "support/os.h"
27 #include "support/package.h"
28 #include "support/unicode.h"
30 #include <boost/function.hpp>
31 #include <boost/filesystem/operations.hpp>
32 #include <boost/filesystem/path.hpp>
52 using std::istringstream;
53 using std::ostringstream;
54 using std::stringstream;
59 using lyx::support::changeExtension;
60 using lyx::support::isStrUnsignedInt;
61 using lyx::support::ltrim;
62 using lyx::support::makeAbsPath;
63 using lyx::support::onlyPath;
64 using lyx::support::rtrim;
65 using lyx::support::isFileReadable;
67 namespace fs = boost::filesystem;
70 IconvProcessor & utf8ToUcs4()
72 static IconvProcessor iconv(ucs4_codeset, "UTF-8");
77 // Hacks to allow the thing to link in the lyxlayout stuff
78 LyXErr lyxerr(std::cerr.rdbuf());
81 string const trim(string const & a, char const * p)
88 string::size_type r = a.find_last_not_of(p);
89 string::size_type l = a.find_first_not_of(p);
91 // Is this the minimal test? (lgb)
92 if (r == string::npos && l == string::npos)
95 return a.substr(l, r - l + 1);
99 void split(string const & s, vector<string> & result, char delim)
101 //cerr << "split 1: '" << s << "'\n";
104 while (getline(is, t, delim))
106 //cerr << "split 2\n";
110 string join(vector<string> const & input, char const * delim)
113 for (size_t i = 0; i < input.size(); ++i) {
122 char const * const * is_known(string const & str, char const * const * what)
124 for ( ; *what; ++what)
132 // current stack of nested environments
133 vector<string> active_environments;
136 string active_environment()
138 return active_environments.empty() ? string() : active_environments.back();
142 CommandMap known_commands;
143 CommandMap known_environments;
144 CommandMap known_math_environments;
147 void add_known_command(string const & command, string const & o1,
150 // We have to handle the following cases:
151 // definition o1 o2 invocation result
152 // \newcommand{\foo}{bar} "" false \foo bar
153 // \newcommand{\foo}[1]{bar #1} "[1]" false \foo{x} bar x
154 // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo bar
155 // \newcommand{\foo}[1][]{bar #1} "[1]" true \foo[x] bar x
156 // \newcommand{\foo}[1][x]{bar #1} "[1]" true \foo[x] bar x
157 unsigned int nargs = 0;
158 vector<ArgumentType> arguments;
159 string const opt1 = rtrim(ltrim(o1, "["), "]");
160 if (isStrUnsignedInt(opt1)) {
161 // The command has arguments
162 nargs = convert<unsigned int>(opt1);
163 if (nargs > 0 && o2) {
164 // The first argument is optional
165 arguments.push_back(optional);
169 for (unsigned int i = 0; i < nargs; ++i)
170 arguments.push_back(required);
171 known_commands[command] = arguments;
175 bool noweb_mode = false;
182 * Read one command definition from the syntax file
184 void read_command(Parser & p, string command, CommandMap & commands) {
185 if (p.next_token().asInput() == "*") {
189 vector<ArgumentType> arguments;
190 while (p.next_token().cat() == catBegin ||
191 p.next_token().asInput() == "[") {
192 if (p.next_token().cat() == catBegin) {
193 string const arg = p.getArg('{', '}');
194 if (arg == "translate")
195 arguments.push_back(required);
197 arguments.push_back(verbatim);
200 arguments.push_back(optional);
203 commands[command] = arguments;
208 * Read a class of environments from the syntax file
210 void read_environment(Parser & p, string const & begin,
211 CommandMap & environments)
215 Token const & t = p.get_token();
216 if (t.cat() == catLetter)
217 environment += t.asInput();
218 else if (!environment.empty()) {
220 read_command(p, environment, environments);
223 if (t.cat() == catEscape && t.asInput() == "\\end") {
224 string const end = p.getArg('{', '}');
233 * Read a list of TeX commands from a reLyX compatible syntax file.
234 * Since this list is used after all commands that have a LyX counterpart
235 * are handled, it does not matter that the "syntax.default" file
236 * has almost all of them listed. For the same reason the reLyX-specific
237 * reLyXre environment is ignored.
239 void read_syntaxfile(string const & file_name)
241 ifstream is(file_name.c_str());
243 cerr << "Could not open syntax file \"" << file_name
244 << "\" for reading." << endl;
247 // We can use our TeX parser, since the syntax of the layout file is
248 // modeled after TeX.
249 // Unknown tokens are just silently ignored, this helps us to skip some
250 // reLyX specific things.
253 Token const & t = p.get_token();
254 if (t.cat() == catEscape) {
255 string const command = t.asInput();
256 if (command == "\\begin") {
257 string const name = p.getArg('{', '}');
258 if (name == "environments" || name == "reLyXre")
259 // We understand "reLyXre", but it is
260 // not as powerful as "environments".
261 read_environment(p, name,
263 else if (name == "mathenvironments")
264 read_environment(p, name,
265 known_math_environments);
267 read_command(p, command, known_commands);
274 string documentclass;
276 bool overwrite_files = false;
279 /// return the number of arguments consumed
280 typedef boost::function<int(string const &, string const &)> cmd_helper;
283 int parse_help(string const &, string const &)
285 cerr << "Usage: tex2lyx [ command line switches ] <infile.tex> [<outfile.lyx>]\n"
286 "Command line switches (case sensitive):\n"
287 "\t-help summarize tex2lyx usage\n"
288 "\t-f Force creation of .lyx files even if they exist already\n"
289 "\t-userdir dir try to set user directory to dir\n"
290 "\t-sysdir dir try to set system directory to dir\n"
291 "\t-c textclass declare the textclass\n"
292 "\t-n translate a noweb (aka literate programming) file.\n"
293 "\t-s syntaxfile read additional syntax file" << endl;
298 int parse_class(string const & arg, string const &)
301 cerr << "Missing textclass string after -c switch" << endl;
309 int parse_syntaxfile(string const & arg, string const &)
312 cerr << "Missing syntaxfile string after -s switch" << endl;
320 // Filled with the command line arguments "foo" of "-sysdir foo" or
322 string cl_system_support;
323 string cl_user_support;
326 int parse_sysdir(string const & arg, string const &)
329 cerr << "Missing directory for -sysdir switch" << endl;
332 cl_system_support = arg;
337 int parse_userdir(string const & arg, string const &)
340 cerr << "Missing directory for -userdir switch" << endl;
343 cl_user_support = arg;
348 int parse_force(string const &, string const &)
350 overwrite_files = true;
355 int parse_noweb(string const &, string const &)
362 void easyParse(int & argc, char * argv[])
364 map<string, cmd_helper> cmdmap;
366 cmdmap["-c"] = parse_class;
367 cmdmap["-f"] = parse_force;
368 cmdmap["-s"] = parse_syntaxfile;
369 cmdmap["-help"] = parse_help;
370 cmdmap["--help"] = parse_help;
371 cmdmap["-n"] = parse_noweb;
372 cmdmap["-sysdir"] = parse_sysdir;
373 cmdmap["-userdir"] = parse_userdir;
375 for (int i = 1; i < argc; ++i) {
376 std::map<string, cmd_helper>::const_iterator it
377 = cmdmap.find(argv[i]);
379 // don't complain if not found - may be parsed later
380 if (it == cmdmap.end())
383 string arg((i + 1 < argc) ? argv[i + 1] : "");
384 string arg2((i + 2 < argc) ? argv[i + 2] : "");
386 int const remove = 1 + it->second(arg, arg2);
388 // Now, remove used arguments by shifting
389 // the following ones remove places down.
391 for (int j = i; j < argc; ++j)
392 argv[j] = argv[j + remove];
398 // path of the first parsed file
399 string masterFilePath;
400 // path of the currently parsed file
401 string parentFilePath;
403 } // anonymous namespace
406 string getMasterFilePath()
408 return masterFilePath;
411 string getParentFilePath()
413 return parentFilePath;
420 * Reads tex input from \a is and writes lyx output to \a os.
421 * Uses some common settings for the preamble, so this should only
422 * be used more than once for included documents.
423 * Caution: Overwrites the existing preamble settings if the new document
424 * contains a preamble.
425 * You must ensure that \p parentFilePath is properly set before calling
428 void tex2lyx(std::istream &is, std::ostream &os)
434 LyXTextClass textclass = parse_preamble(p, ss, documentclass);
436 active_environments.push_back("document");
437 Context context(true, textclass);
438 parse_text(p, ss, FLAG_END, true, context);
440 // Empty document body. LyX needs at least one paragraph.
441 context.check_layout(ss);
442 context.check_end_layout(ss);
443 ss << "\n\\end_body\n\\end_document\n";
444 active_environments.pop_back();
449 ofstream parsertest("parsertest.tex");
451 parsertest << p.get_token().asInput();
452 // <origfile> and parsertest.tex should now have identical content
457 /// convert TeX from \p infilename to LyX and write it to \p os
458 bool tex2lyx(string const &infilename, std::ostream &os)
460 BOOST_ASSERT(lyx::support::absolutePath(infilename));
461 ifstream is(infilename.c_str());
463 cerr << "Could not open input file \"" << infilename
464 << "\" for reading." << endl;
467 string const oldParentFilePath = parentFilePath;
468 parentFilePath = onlyPath(infilename);
470 parentFilePath = oldParentFilePath;
474 } // anonymous namespace
477 bool tex2lyx(string const &infilename, string const &outfilename)
479 if (isFileReadable(outfilename)) {
480 if (overwrite_files) {
481 cerr << "Overwriting existing file "
482 << outfilename << endl;
484 cerr << "Not overwriting existing file "
485 << outfilename << endl;
489 cerr << "Creating file " << outfilename << endl;
491 ofstream os(outfilename.c_str());
493 cerr << "Could not open output file \"" << outfilename
494 << "\" for writing." << endl;
498 cerr << "Input file: " << infilename << "\n";
499 cerr << "Output file: " << outfilename << "\n";
501 return tex2lyx(infilename, os);
507 int main(int argc, char * argv[])
510 fs::path::default_name_check(fs::no_check);
512 easyParse(argc, argv);
515 cerr << "Usage: tex2lyx [ command line switches ] <infile.tex> [<outfile.lyx>]\n"
516 "See tex2lyx -help." << endl;
520 lyx::support::os::init(argc, argv);
521 lyx::support::init_package(argv[0], cl_system_support, cl_user_support,
522 lyx::support::top_build_dir_is_two_levels_up);
524 // Now every known option is parsed. Look for input and output
525 // file name (the latter is optional).
526 string const infilename = makeAbsPath(argv[1]);
529 outfilename = argv[2];
530 if (outfilename != "-")
531 outfilename = makeAbsPath(argv[2]);
533 outfilename = changeExtension(infilename, ".lyx");
535 string const system_syntaxfile = lyx::support::libFileSearch("", "syntax.default");
536 if (system_syntaxfile.empty()) {
537 cerr << "Error: Could not find syntax file \"syntax.default\"." << endl;
540 read_syntaxfile(system_syntaxfile);
541 if (!syntaxfile.empty())
542 read_syntaxfile(syntaxfile);
544 masterFilePath = onlyPath(infilename);
545 parentFilePath = masterFilePath;
547 if (outfilename == "-") {
548 if (tex2lyx(infilename, cout))
553 if (tex2lyx(infilename, outfilename))