]> git.lyx.org Git - lyx.git/blobdiff - src/tex2lyx/preamble.cpp
hyperref support for tex2lyx
[lyx.git] / src / tex2lyx / preamble.cpp
index 4f242c9eecb9f7c879446a5079e3d63615a7187d..1c10c6a4e528fd9b21ff3267d11b87bf049aadc5 100644 (file)
@@ -3,8 +3,8 @@
  * This file is part of LyX, the document processor.
  * Licence details can be found in the file COPYING.
  *
- * \author André Pönitz
- * \author Uwe Stöhr
+ * \author André Pönitz
+ * \author Uwe Stöhr
  *
  * Full author contact details are available in file CREDITS.
  */
 
 #include "tex2lyx.h"
 
+#include "LayoutFile.h"
 #include "Layout.h"
 #include "Lexer.h"
 #include "TextClass.h"
+
 #include "support/convert.h"
+#include "support/FileName.h"
 #include "support/filetools.h"
 #include "support/lstrings.h"
 
+#include "support/regex.h"
+
 #include <algorithm>
 #include <iostream>
 #include <sstream>
 #include <vector>
 #include <map>
 
+using namespace std;
+using namespace lyx::support;
 
-namespace lyx {
-
-using std::istringstream;
-using std::ostream;
-using std::ostringstream;
-using std::string;
-using std::vector;
-using std::cerr;
-using std::endl;
-using std::find;
 
-using support::FileName;
-using support::libFileSearch;
-using support::isStrDbl;
+namespace lyx {
 
 // special columntypes
-extern std::map<char, int> special_columns;
+extern map<char, int> special_columns;
 
-std::map<string, vector<string> > used_packages;
+map<string, vector<string> > used_packages;
 
 // needed to handle encodings with babel
 bool one_language = true;
 
-// to avoid that the babel options overwrite the documentclass options
-bool documentclass_language;
-
 namespace {
 
-const char * const known_languages[] = { "afrikaans", "american", "arabic",
-"austrian", "bahasa", "basque", "belarusian", "brazil", "breton", "british",
-"bulgarian", "canadian", "canadien", "catalan", "croatian", "czech", "danish",
-"dutch", "english", "esperanto", "estonian", "finnish", "francais", "french",
+//add this to known_languages when updating to lyxformat 266:
+// "armenian"
+//add these to known_languages when updating to lyxformat 268:
+//"chinese-simplified", "chinese-traditional", "japanese", "korean"
+// Both changes require first that support for non-babel languages (CJK,
+// armtex) is added.
+// add turkmen for lyxformat 383
+const char * const known_languages[] = { "afrikaans", "albanian", "american",
+"arabic", "arabtex", "austrian", "bahasa", "bahasai", "bahasam", "basque",
+"belarusian", "brazil", "brazilian", "breton", "british", "bulgarian",
+"canadian", "canadien", "catalan", "croatian", "czech", "danish", "dutch",
+"english", "esperanto", "estonian", "finnish", "francais", "french",
 "frenchb", "frenchle", "frenchpro", "galician", "german", "germanb", "greek",
-"hebrew", "icelandic", "irish", "italian", "lsorbian", "magyar", "naustrian",
-"ngerman", "ngermanb", "norsk", "nynorsk", "polish", "portuges", "romanian",
-"russian", "russianb", "scottish", "serbian", "slovak", "slovene", "spanish",
-"swedish", "thai", "turkish", "ukraineb", "ukrainian", "usorbian", "welsh", 0};
-
-//note this when updating to lyxformat 305:
-//bahasai, indonesian, and indon = equal to bahasa
-//malay, and meyalu = equal to bahasam
-
+"hebrew", "icelandic", "indon", "indonesian", "interlingua", "irish",
+"italian", "kazakh", "latin", "latvian", "lithuanian", "lsorbian", "magyar",
+"malay", "meyalu", "mongolian", "naustrian", "ngerman", "ngermanb", "norsk",
+"nynorsk", "polutonikogreek", "polish", "portuges", "portuguese", "romanian",
+"russian", "russianb", "samin", "scottish", "serbian", "serbian-latin",
+"slovak", "slovene", "spanish", "swedish", "thai", "turkish", "ukraineb",
+"ukrainian", "usorbian", "vietnam", "welsh", 0};
+
+const char * const known_bahasa_languages[] = {"bahasa", "bahasai",
+                                               "indon", "indonesian", 0};
+const char * const known_bahasam_languages[] = {"bahasam", "malay",
+                                               "meyalu", 0};
+const char * const known_brazilian_languages[] = {"brazil", "brazilian", 0};
 const char * const known_french_languages[] = {"french", "frenchb", "francais",
                                                "frenchle", "frenchpro", 0};
 const char * const known_german_languages[] = {"german", "germanb", 0};
 const char * const known_ngerman_languages[] = {"ngerman", "ngermanb", 0};
+const char * const known_portuguese_languages[] = {"portuges", "portuguese", 0};
 const char * const known_russian_languages[] = {"russian", "russianb", 0};
 const char * const known_ukrainian_languages[] = {"ukrainian", "ukraineb", 0};
 
+//add these to known_english_quotes_languages when updating to lyxformat 268:
+//"chinese-simplified", "korean"
+// This requires first that support for non-babel languages (CJK) is added.
+const char * const known_english_quotes_languages[] = {"american", "canadian",
+"english", "esperanto", "hebrew", "irish", "scottish", "thai", 0};
+
+//add this to known_french_quotes_languages when updating to
+//lyxformat 383: "turkmen"
+const char * const known_french_quotes_languages[] = {"albanian", "arabic",
+"basque", "canadien", "catalan", "galician", "greek", "italian", "norsk",
+"nynorsk", "polutonikogreek", "spanish", "spanish-mexico", "turkish",
+"vietnam", 0};
+
+const char * const known_german_quotes_languages[] = {"austrian", "bulgarian",
+"czech", "icelandic", "lithuanian", "lsorbian", "naustrian", "serbian",
+"serbian-latin", "slovak", "slovene", "usorbian",  0};
+
+const char * const known_polish_quotes_languages[] = {"afrikaans", "croatian",
+"dutch", "estonian", "magyar", "polish", "romanian", 0};
+
+const char * const known_swedish_quotes_languages[] = {"finnish",
+"swedish", 0};
+
 char const * const known_fontsizes[] = { "10pt", "11pt", "12pt", 0 };
 
 const char * const known_roman_fonts[] = { "ae", "bookman", "charter",
@@ -91,9 +119,24 @@ const char * const known_typewriter_fonts[] = { "beramono", "cmtl", "cmtt",
 "courier", "lmtt", "luximono", "fourier", "lmodern", "mathpazo", "mathptmx",
 "newcent", 0};
 
-// some ugly stuff
+const char * const known_paper_sizes[] = { "a3paper", "b3paper", "a4paper",
+"b4paper", "a5paper", "b5paper", "executivepaper", "legalpaper",
+"letterpaper", 0};
+
+const char * const known_class_paper_sizes[] = { "a4paper", "a5paper",
+"executivepaper", "legalpaper", "letterpaper", 0};
+
+const char * const known_paper_margins[] = { "lmargin", "tmargin", "rmargin", 
+"bmargin", "headheight", "headsep", "footskip", "columnsep", 0};
+
+const char * const known_coded_paper_margins[] = { "leftmargin", "topmargin",
+"rightmargin", "bottommargin", "headheight", "headsep", "footskip",
+"columnsep", 0};
+
+// default settings
 ostringstream h_preamble;
 string h_textclass               = "article";
+string h_use_default_options     = "false";
 string h_options                 = string();
 string h_language                = "english";
 string h_inputencoding           = "auto";
@@ -106,11 +149,29 @@ string h_font_osf                = "false";
 string h_font_sf_scale           = "100";
 string h_font_tt_scale           = "100";
 string h_graphics                = "default";
+string h_float_placement;
 string h_paperfontsize           = "default";
 string h_spacing                 = "single";
+string h_use_hyperref            = "0";
+string h_pdf_title;
+string h_pdf_author;
+string h_pdf_subject;
+string h_pdf_keywords;
+string h_pdf_bookmarks           = "1";
+string h_pdf_bookmarksnumbered   = "0";
+string h_pdf_bookmarksopen       = "0";
+string h_pdf_bookmarksopenlevel  = "1";
+string h_pdf_breaklinks          = "0";
+string h_pdf_pdfborder           = "0";
+string h_pdf_colorlinks          = "0";
+string h_pdf_backref             = "section";
+string h_pdf_pdfusetitle         = "1";
+string h_pdf_pagemode;
+string h_pdf_quoted_options;
 string h_papersize               = "default";
 string h_use_geometry            = "false";
-string h_use_amsmath             = "0";
+string h_use_amsmath             = "1";
+string h_use_esint               = "1";
 string h_cite_engine             = "basic";
 string h_use_bibtopic            = "false";
 string h_paperorientation        = "portrait";
@@ -118,19 +179,39 @@ string h_secnumdepth             = "3";
 string h_tocdepth                = "3";
 string h_paragraph_separation    = "indent";
 string h_defskip                 = "medskip";
+string h_paragraph_indentation   = "default";
 string h_quotes_language         = "english";
 string h_papercolumns            = "1";
 string h_papersides              = string();
 string h_paperpagestyle          = "default";
+string h_listings_params;
 string h_tracking_changes        = "false";
 string h_output_changes          = "false";
+string h_margins                 = "";
+
+
+/// translates a babel language name to a LyX language name
+string babel2lyx(string language)
+{
+       if (language == "arabtex")
+               return "arabic_arabtex";
+       if (language == "arabic")
+               return "arabic_arabi";
+       if (language == "lsorbian")
+               return "lowersorbian";
+       if (language == "usorbian")
+               return "uppersorbian";
+       return language;
+}
 
 
-void handle_opt(vector<string> & opts, char const * const * what, string & target)
+// returns true if at least one of the options in what has been found
+bool handle_opt(vector<string> & opts, char const * const * what, string & target)
 {
        if (opts.empty())
-               return;
+               return false;
 
+       bool found = false;
        // the last language option is the document language (for babel and LyX)
        // the last size option is the document font size
        vector<string>::iterator it;
@@ -138,13 +219,14 @@ void handle_opt(vector<string> & opts, char const * const * what, string & targe
        for (; *what; ++what) {
                it = find(opts.begin(), opts.end(), *what);
                if (it != opts.end()) {
-                       documentclass_language = true;
                        if (it >= position) {
+                               found = true;
                                target = *what;
                                position = it;
                        }
                }
        }
+       return found;
 }
 
 
@@ -154,8 +236,7 @@ void delete_opt(vector<string> & opts, char const * const * what)
                return;
 
        // remove found options from the list
-       // do this after handle_opt to avoid potential memory leaks and to be able
-       // to find in every case the last language option
+       // do this after handle_opt to avoid potential memory leaks
        vector<string>::iterator it;
        for (; *what; ++what) {
                it = find(opts.begin(), opts.end(), *what);
@@ -199,6 +280,28 @@ vector<string> split_options(string const & input)
 }
 
 
+/*!
+ * Retrieve a keyval option "name={value with=sign}" named \p name from
+ * \p options and return the value.
+ * The found option is also removed from \p options.
+ */
+string process_keyval_opt(vector<string> & options, string name)
+{
+       for (size_t i = 0; i < options.size(); ++i) {
+               vector<string> option;
+               split(options[i], option, '=');
+               if (option.size() < 2)
+                       continue;
+               if (option[0] == name) {
+                       options.erase(options.begin() + i);
+                       option.erase(option.begin());
+                       return join(option, "=");
+               }
+       }
+       return "";
+}
+
+
 /*!
  * Add package \p name with options \p options to used_packages.
  * Remove options from \p options that we don't want to output.
@@ -232,30 +335,131 @@ string const scale_as_percentage(string const & scale)
                        return convert<string>(100 * convert<double>(value));
        }
        // If the input string didn't match our expectations.
-       // return the default value "100" 
+       // return the default value "100"
        return "100";
 }
 
 
-void handle_package(string const & name, string const & opts)
+string remove_braces(string const & value)
+{
+       if (value.empty())
+               return value;
+       if (value[0] == '{' && value[value.length()-1] == '}')
+               return value.substr(1, value.length()-2);
+       return value;
+}
+
+
+void handle_hyperref(vector<string> & options)
+{
+       // FIXME swallow inputencoding changes that might surround the
+       //       hyperref setup if it was written by LyX
+       h_use_hyperref = "1";
+       // swallow "unicode=true", since LyX does always write that
+       vector<string>::iterator it =
+               find(options.begin(), options.end(), "unicode=true");
+       if (it != options.end())
+               options.erase(it);
+       it = find(options.begin(), options.end(), "pdfusetitle");
+       if (it != options.end()) {
+               h_pdf_pdfusetitle = "1";
+               options.erase(it);
+       }
+       string bookmarks = process_keyval_opt(options, "bookmarks");
+       if (bookmarks == "true")
+               h_pdf_bookmarks = "1";
+       else if (bookmarks == "false")
+               h_pdf_bookmarks = "0";
+       if (h_pdf_bookmarks == "1") {
+               string bookmarksnumbered =
+                       process_keyval_opt(options, "bookmarksnumbered");
+               if (bookmarksnumbered == "true")
+                       h_pdf_bookmarksnumbered = "1";
+               else if (bookmarksnumbered == "false")
+                       h_pdf_bookmarksnumbered = "0";
+               string bookmarksopen =
+                       process_keyval_opt(options, "bookmarksopen");
+               if (bookmarksopen == "true")
+                       h_pdf_bookmarksopen = "1";
+               else if (bookmarksopen == "false")
+                       h_pdf_bookmarksopen = "0";
+               if (h_pdf_bookmarksopen == "1") {
+                       string bookmarksopenlevel =
+                               process_keyval_opt(options, "bookmarksopenlevel");
+                       if (!bookmarksopenlevel.empty())
+                               h_pdf_bookmarksopenlevel = bookmarksopenlevel;
+               }
+       }
+       string breaklinks = process_keyval_opt(options, "breaklinks");
+       if (breaklinks == "true")
+               h_pdf_breaklinks = "1";
+       else if (breaklinks == "false")
+               h_pdf_breaklinks = "0";
+       string pdfborder = process_keyval_opt(options, "pdfborder");
+       if (pdfborder == "{0 0 0}")
+               h_pdf_pdfborder = "1";
+       else if (pdfborder == "{0 0 1}")
+               h_pdf_pdfborder = "0";
+       string backref = process_keyval_opt(options, "backref");
+       if (!backref.empty())
+               h_pdf_backref = backref;
+       string colorlinks = process_keyval_opt(options, "colorlinks");
+       if (colorlinks == "true")
+               h_pdf_colorlinks = "1";
+       else if (colorlinks == "false")
+               h_pdf_colorlinks = "0";
+       string pdfpagemode = process_keyval_opt(options, "pdfpagemode");
+       if (!pdfpagemode.empty())
+               h_pdf_pagemode = pdfpagemode;
+       string pdftitle = process_keyval_opt(options, "pdftitle");
+       if (!pdftitle.empty()) {
+               h_pdf_title = remove_braces(pdftitle);
+       }
+       string pdfauthor = process_keyval_opt(options, "pdfauthor");
+       if (!pdfauthor.empty()) {
+               h_pdf_author = remove_braces(pdfauthor);
+       }
+       string pdfsubject = process_keyval_opt(options, "pdfsubject");
+       if (!pdfsubject.empty())
+               h_pdf_subject = remove_braces(pdfsubject);
+       string pdfkeywords = process_keyval_opt(options, "pdfkeywords");
+       if (!pdfkeywords.empty())
+               h_pdf_keywords = remove_braces(pdfkeywords);
+       if (!options.empty()) {
+               if (!h_pdf_quoted_options.empty())
+                       h_pdf_quoted_options += ',';
+               h_pdf_quoted_options += join(options, ",");
+               options.clear();
+       }
+}
+
+
+void handle_package(Parser &p, string const & name, string const & opts,
+                   bool in_lyx_preamble)
 {
        vector<string> options = split_options(opts);
        add_package(name, options);
        string scale;
 
        // roman fonts
-       if (is_known(name, known_roman_fonts))
+       if (is_known(name, known_roman_fonts)) {
                h_font_roman = name;
+               p.skip_spaces();
+       }
+
        if (name == "fourier") {
                h_font_roman = "utopia";
                // when font uses real small capitals
                if (opts == "expert")
                        h_font_sc = "true";
        }
+
        if (name == "mathpazo")
                h_font_roman = "palatino";
+
        if (name == "mathptmx")
                h_font_roman = "times";
+
        // sansserif fonts
        if (is_known(name, known_sans_fonts)) {
                h_font_sans = name;
@@ -264,6 +468,7 @@ void handle_package(string const & name, string const & opts)
                        h_font_sf_scale = scale_as_percentage(scale);
                }
        }
+
        // typewriter fonts
        if (is_known(name, known_typewriter_fonts)) {
                h_font_typewriter = name;
@@ -272,12 +477,17 @@ void handle_package(string const & name, string const & opts)
                        h_font_tt_scale = scale_as_percentage(scale);
                }
        }
+
        // font uses old-style figure
        if (name == "eco")
                h_font_osf = "true";
 
        else if (name == "amsmath" || name == "amssymb")
-               h_use_amsmath = "1";
+               h_use_amsmath = "2";
+
+       else if (name == "esint")
+               h_use_esint = "2";
+
        else if (name == "babel" && !opts.empty()) {
                // check if more than one option was used - used later for inputenc
                // in case inputenc is parsed before babel, set the encoding to auto
@@ -285,58 +495,74 @@ void handle_package(string const & name, string const & opts)
                        one_language = false;
                        h_inputencoding = "auto";
                }
-               // only set the document language when there was not already one set
-               // via the documentclass options
-               // babel takes the the last language given in the documentclass options
-               // as document language. If there is no such language option, the last
-               // option of its \usepackage call is used.
-               if (documentclass_language == false) {
-                       handle_opt(options, known_languages, h_language);
-                       delete_opt(options, known_languages);
-                       if (is_known(h_language, known_french_languages))
-                               h_language = "french";
-                       else if (is_known(h_language, known_german_languages))
-                               h_language = "german";
-                       else if (is_known(h_language, known_ngerman_languages))
-                               h_language = "ngerman";
-                       else if (is_known(h_language, known_russian_languages))
-                               h_language = "russian";
-                       else if (is_known(h_language, known_ukrainian_languages))
-                               h_language = "ukrainian";
-                       h_quotes_language = h_language;
-               }
+               // babel takes the last language of the option of its \usepackage
+               // call as document language. If there is no such language option, the
+               // last language in the documentclass options is used.
+               handle_opt(options, known_languages, h_language);
+               delete_opt(options, known_languages);
        }
+
        else if (name == "fontenc")
-               ; // ignore this
+                ;// ignore this
+
        else if (name == "inputenc") {
-               // only set when there is not more than one inputenc option
-               // therefore check for the "," character
-               // also only set when there is not more then one babel language option
+               // h_inputencoding is only set when there is not more than one
+               // inputenc option because otherwise h_inputencoding must be
+               // set to "auto" (the default encoding of the document language)
+               // Therefore check for the "," character.
+               // It is also only set when there is not more then one babel
+               // language option but this is handled in the routine for babel.
                if (opts.find(",") == string::npos && one_language == true)
                        h_inputencoding = opts;
+               if (!options.empty())
+                       p.setEncoding(options.back());
                options.clear();
-       } else if (name == "makeidx")
+       }
+
+       else if (name == "makeidx")
+               ; // ignore this
+
+       else if (name == "prettyref")
+               ; // ignore this
+
+       else if (name == "varioref")
                ; // ignore this
+
        else if (name == "verbatim")
                ; // ignore this
+
+       else if (name == "nomencl")
+               ; // ignore this
+
+       else if (name == "textcomp")
+               ; // ignore this
+
+       else if (name == "url")
+               ; // ignore this
+
+       else if (LYX_FORMAT >= 408 && name == "subscript")
+               ; // ignore this
+
+       else if (name == "color") {
+               // with the following command this package is only loaded when needed for
+               // undefined colors, since we only support the predefined colors
+               h_preamble << "\\@ifundefined{definecolor}\n {\\usepackage{color}}{}\n";
+       }
+
        else if (name == "graphicx")
                ; // ignore this
-       else if (is_known(name, known_languages)) {
-               if (is_known(name, known_french_languages))
-                       h_language = "french";
-               else if (is_known(name, known_german_languages))
-                       h_language = "german";
-               else if (is_known(name, known_ngerman_languages))
-                       h_language = "ngerman";
-               else if (is_known(name, known_russian_languages))
-                       h_language = "russian";
-               else if (is_known(name, known_ukrainian_languages))
-                       h_language = "ukrainian";
-               else
-                       h_language = name;
-               h_quotes_language = h_language;
-
-       } else if (name == "natbib") {
+
+       else if (name == "setspace")
+               ; // ignore this
+
+       else if (name == "geometry")
+               ; // Ignore this, the geometry settings are made by the \geometry
+                 // command. This command is handled below.
+
+       else if (is_known(name, known_languages))
+               h_language = name;
+
+       else if (name == "natbib") {
                h_cite_engine = "natbib_authoryear";
                vector<string>::iterator it =
                        find(options.begin(), options.end(), "authoryear");
@@ -349,27 +575,98 @@ void handle_package(string const & name, string const & opts)
                                options.erase(it);
                        }
                }
-       } else if (name == "jurabib") {
+       }
+
+       else if (name == "jurabib")
                h_cite_engine = "jurabib";
-       } else if (options.empty())
-               h_preamble << "\\usepackage{" << name << "}\n";
-       else {
-               h_preamble << "\\usepackage[" << opts << "]{" << name << "}\n";
-               options.clear();
+
+       else if (name == "hyperref")
+               handle_hyperref(options);
+
+       else if (!in_lyx_preamble) {
+               if (options.empty())
+                       h_preamble << "\\usepackage{" << name << "}";
+               else {
+                       h_preamble << "\\usepackage[" << opts << "]{" 
+                                  << name << "}";
+                       options.clear();
+               }
        }
 
        // We need to do something with the options...
        if (!options.empty())
                cerr << "Ignoring options '" << join(options, ",")
                     << "' of package " << name << '.' << endl;
+
+       // remove the whitespace
+       p.skip_spaces();
 }
 
 
 
 void end_preamble(ostream & os, TextClass const & /*textclass*/)
 {
+       // merge synonym languages
+       if (is_known(h_language, known_bahasa_languages))
+               h_language = "bahasa";
+       else if (is_known(h_language, known_bahasam_languages))
+               h_language = "bahasam";
+       else if (is_known(h_language, known_brazilian_languages))
+               h_language = "brazilian";
+       else if (is_known(h_language, known_french_languages))
+               h_language = "french";
+       else if (is_known(h_language, known_german_languages))
+               h_language = "german";
+       else if (is_known(h_language, known_ngerman_languages))
+               h_language = "ngerman";
+       else if (is_known(h_language, known_portuguese_languages))
+               h_language = "portuguese";
+       else if (is_known(h_language, known_russian_languages))
+               h_language = "russian";
+       else if (is_known(h_language, known_ukrainian_languages))
+               h_language = "ukrainian";
+
+       // set the quote language
+       // LyX only knows the following quotes languages:
+       // english, swedish, german, polish, french and danish
+       // (quotes for "japanese" and "chinese-traditional" are missing because
+       //  they wouldn't be useful: http://www.lyx.org/trac/ticket/6383)
+       // conversion list taken from
+       // http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage
+       // (quotes for kazakh and interlingua are unknown)
+       // danish
+       if (h_language == "danish")
+               h_quotes_language = "danish";
+       // french
+       else if (is_known(h_language, known_french_quotes_languages)
+               || is_known(h_language, known_french_languages)
+               || is_known(h_language, known_russian_languages)
+               || is_known(h_language, known_ukrainian_languages))
+               h_quotes_language = "french";
+       // german
+       else if (is_known(h_language, known_german_quotes_languages)
+               || is_known(h_language, known_german_languages)
+               || is_known(h_language, known_ngerman_languages))
+               h_quotes_language = "german";
+       // polish
+       else if (is_known(h_language, known_polish_quotes_languages))
+               h_quotes_language = "polish";
+       // swedish
+       else if (is_known(h_language, known_swedish_quotes_languages))
+               h_quotes_language = "swedish";
+       //english
+       else if (is_known(h_language, known_english_quotes_languages)
+               || is_known(h_language, known_bahasa_languages)
+               || is_known(h_language, known_bahasam_languages)
+               || is_known(h_language, known_brazilian_languages)
+               || is_known(h_language, known_portuguese_languages))
+               h_quotes_language = "english";
+
+       h_language = babel2lyx(h_language);
+
+       // output the LyX file settings
        os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
-          << "\\lyxformat 247\n"
+          << "\\lyxformat " << LYX_FORMAT << '\n'
           << "\\begin_document\n"
           << "\\begin_header\n"
           << "\\textclass " << h_textclass << "\n";
@@ -377,7 +674,8 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
                os << "\\begin_preamble\n" << h_preamble.str() << "\n\\end_preamble\n";
        if (!h_options.empty())
                os << "\\options " << h_options << "\n";
-       os << "\\language " << h_language << "\n"
+       os << "\\use_default_options " << h_use_default_options << "\n"
+          << "\\language " << h_language << "\n"
           << "\\inputencoding " << h_inputencoding << "\n"
           << "\\font_roman " << h_font_roman << "\n"
           << "\\font_sans " << h_font_sans << "\n"
@@ -387,24 +685,57 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
           << "\\font_osf " << h_font_osf << "\n"
           << "\\font_sf_scale " << h_font_sf_scale << "\n"
           << "\\font_tt_scale " << h_font_tt_scale << "\n"
-          << "\\graphics " << h_graphics << "\n"
-          << "\\paperfontsize " << h_paperfontsize << "\n"
+          << "\\graphics " << h_graphics << "\n";
+       if (!h_float_placement.empty())
+               os << "\\float_placement " << h_float_placement << "\n";
+       os << "\\paperfontsize " << h_paperfontsize << "\n"
           << "\\spacing " << h_spacing << "\n"
-          << "\\papersize " << h_papersize << "\n"
+          << "\\use_hyperref " << h_use_hyperref << '\n';
+       if (h_use_hyperref == "1") {
+               if (!h_pdf_title.empty())
+                       os << "\\pdf_title \"" << h_pdf_title << "\"\n";
+               if (!h_pdf_author.empty())
+                       os << "\\pdf_author \"" << h_pdf_author << "\"\n";
+               if (!h_pdf_subject.empty())
+                       os << "\\pdf_subject \"" << h_pdf_subject << "\"\n";
+               if (!h_pdf_keywords.empty())
+                       os << "\\pdf_keywords \"" << h_pdf_keywords << "\"\n";
+               os << "\\pdf_bookmarks " << h_pdf_bookmarks << "\n"
+                     "\\pdf_bookmarksnumbered " << h_pdf_bookmarksnumbered << "\n"
+                     "\\pdf_bookmarksopen " << h_pdf_bookmarksopen << "\n"
+                     "\\pdf_bookmarksopenlevel " << h_pdf_bookmarksopenlevel << "\n"
+                     "\\pdf_breaklinks " << h_pdf_breaklinks << "\n"
+                     "\\pdf_pdfborder " << h_pdf_pdfborder << "\n"
+                     "\\pdf_colorlinks " << h_pdf_colorlinks << "\n"
+                     "\\pdf_backref " << h_pdf_backref << "\n"
+                     "\\pdf_pdfusetitle " << h_pdf_pdfusetitle << '\n';
+               if (!h_pdf_pagemode.empty())
+                       os << "\\pdf_pagemode " << h_pdf_pagemode << '\n';
+               if (!h_pdf_quoted_options.empty())
+                       os << "\\pdf_quoted_options \"" << h_pdf_quoted_options << "\"\n";
+       }
+       os << "\\papersize " << h_papersize << "\n"
           << "\\use_geometry " << h_use_geometry << "\n"
           << "\\use_amsmath " << h_use_amsmath << "\n"
+          << "\\use_esint " << h_use_esint << "\n"
           << "\\cite_engine " << h_cite_engine << "\n"
           << "\\use_bibtopic " << h_use_bibtopic << "\n"
           << "\\paperorientation " << h_paperorientation << "\n"
+          << h_margins
           << "\\secnumdepth " << h_secnumdepth << "\n"
           << "\\tocdepth " << h_tocdepth << "\n"
-          << "\\paragraph_separation " << h_paragraph_separation << "\n"
-          << "\\defskip " << h_defskip << "\n"
-          << "\\quotes_language " << h_quotes_language << "\n"
+          << "\\paragraph_separation " << h_paragraph_separation << "\n";
+       if (LYX_FORMAT < 365 || h_paragraph_separation == "skip")
+               os << "\\defskip " << h_defskip << "\n";
+       else
+               os << "\\paragraph_indentation " << h_paragraph_indentation << "\n";
+       os << "\\quotes_language " << h_quotes_language << "\n"
           << "\\papercolumns " << h_papercolumns << "\n"
           << "\\papersides " << h_papersides << "\n"
-          << "\\paperpagestyle " << h_paperpagestyle << "\n"
-          << "\\tracking_changes " << h_tracking_changes << "\n"
+          << "\\paperpagestyle " << h_paperpagestyle << "\n";
+       if (!h_listings_params.empty())
+               os << "\\listings_params " << h_listings_params << "\n";
+       os << "\\tracking_changes " << h_tracking_changes << "\n"
           << "\\output_changes " << h_output_changes << "\n"
           << "\\end_header\n\n"
           << "\\begin_body\n";
@@ -414,11 +745,14 @@ void end_preamble(ostream & os, TextClass const & /*textclass*/)
 
 } // anonymous namespace
 
-TextClass const parse_preamble(Parser & p, ostream & os, string const & forceclass)
+void parse_preamble(Parser & p, ostream & os, 
+       string const & forceclass, TeX2LyXDocClass & tc)
 {
        // initialize fixed types
        special_columns['D'] = 3;
        bool is_full_document = false;
+       bool is_lyx_file = false;
+       bool in_lyx_preamble = false;
 
        // determine whether this is a full document or a fragment for inclusion
        while (p.good()) {
@@ -441,37 +775,68 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                //
                // cat codes
                //
-               if (t.cat() == catLetter ||
-                         t.cat() == catSuper ||
-                         t.cat() == catSub ||
-                         t.cat() == catOther ||
-                         t.cat() == catMath ||
-                         t.cat() == catActive ||
-                         t.cat() == catBegin ||
-                         t.cat() == catEnd ||
-                         t.cat() == catAlign ||
-                         t.cat() == catParameter)
-               h_preamble << t.character();
-
-               else if (t.cat() == catSpace || t.cat() == catNewline)
+               if (!in_lyx_preamble &&
+                   (t.cat() == catLetter ||
+                    t.cat() == catSuper ||
+                    t.cat() == catSub ||
+                    t.cat() == catOther ||
+                    t.cat() == catMath ||
+                    t.cat() == catActive ||
+                    t.cat() == catBegin ||
+                    t.cat() == catEnd ||
+                    t.cat() == catAlign ||
+                    t.cat() == catParameter))
+                       h_preamble << t.cs();
+
+               else if (!in_lyx_preamble && 
+                        (t.cat() == catSpace || t.cat() == catNewline))
                        h_preamble << t.asInput();
 
-               else if (t.cat() == catComment)
-                       h_preamble << t.asInput();
+               else if (t.cat() == catComment) {
+                       // regex to parse comments (currently not used)
+                       static regex const islyxfile("%% LyX .* created this file");
+                       static regex const usercommands("User specified LaTeX commands");
+
+                       string const comment = t.asInput();
+
+                       // magically switch encoding default if it looks like XeLaTeX
+                       static string const magicXeLaTeX =
+                               "% This document must be compiled with XeLaTeX ";
+                       if (comment.size() > magicXeLaTeX.size() 
+                                 && comment.substr(0, magicXeLaTeX.size()) == magicXeLaTeX
+                                 && h_inputencoding == "auto") {
+                               cerr << "XeLaTeX comment found, switching to UTF8\n";
+                               h_inputencoding = "utf8";
+                       }
+                       smatch sub;
+                       if (regex_search(comment, sub, islyxfile)) {
+                               is_lyx_file = true;
+                               in_lyx_preamble = true;
+                       } else if (is_lyx_file
+                                  && regex_search(comment, sub, usercommands))
+                               in_lyx_preamble = false;
+                       else if (!in_lyx_preamble)
+                               h_preamble << t.asInput();
+               }
 
                else if (t.cs() == "pagestyle")
                        h_paperpagestyle = p.verbatim_item();
 
                else if (t.cs() == "makeatletter") {
+                       // LyX takes care of this
                        p.setCatCode('@', catLetter);
                }
 
                else if (t.cs() == "makeatother") {
+                       // LyX takes care of this
                        p.setCatCode('@', catOther);
                }
 
                else if (t.cs() == "newcommand" || t.cs() == "renewcommand"
-                           || t.cs() == "providecommand") {
+                           || t.cs() == "providecommand"
+                               || t.cs() == "DeclareRobustCommand"
+                               || t.cs() == "ProvideTextCommandDefault"
+                               || t.cs() == "DeclareMathAccent") {
                        bool star = false;
                        if (p.next_token().character() == '*') {
                                p.get_token();
@@ -485,34 +850,19 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                        if (name == "\\rmdefault")
                                if (is_known(body, known_roman_fonts))
                                        h_font_roman = body;
-
                        if (name == "\\sfdefault")
                                if (is_known(body, known_sans_fonts))
                                        h_font_sans = body;
-
                        if (name == "\\ttdefault")
                                if (is_known(body, known_typewriter_fonts))
                                        h_font_typewriter = body;
-
                        if (name == "\\familydefault") {
                                string family = body;
                                // remove leading "\"
                                h_font_default_family = family.erase(0,1);
                        }
                        // only non-lyxspecific stuff
-                       if (   name != "\\noun"
-                           && name != "\\tabularnewline"
-                           && name != "\\LyX"
-                           && name != "\\lyxline"
-                           && name != "\\lyxaddress"
-                           && name != "\\lyxrightaddress"
-                           && name != "\\lyxdot"
-                           && name != "\\boldsymbol"
-                           && name != "\\lyxarrow"
-                           && name != "\\rmdefault"
-                           && name != "\\sfdefault"
-                           && name != "\\ttdefault"
-                           && name != "\\familydefault") {
+                       if (!in_lyx_preamble) {
                                ostringstream ss;
                                ss << '\\' << t.cs();
                                if (star)
@@ -532,6 +882,7 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                }
 
                else if (t.cs() == "documentclass") {
+                       vector<string>::iterator it;
                        vector<string> opts = split_options(p.getArg('[', ']'));
                        handle_opt(opts, known_fontsizes, h_paperfontsize);
                        delete_opt(opts, known_fontsizes);
@@ -539,21 +890,45 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                        string::size_type i = h_paperfontsize.find("pt");
                        if (i != string::npos)
                                h_paperfontsize.erase(i);
-                       // to avoid that the babel options overwrite the documentclass options
-                       documentclass_language = false;
+                       // The documentclass options are always parsed before the options
+                       // of the babel call so that a language cannot overwrite the babel
+                       // options.
                        handle_opt(opts, known_languages, h_language);
                        delete_opt(opts, known_languages);
-                       if (is_known(h_language, known_french_languages))
-                               h_language = "french";
-                       else if (is_known(h_language, known_german_languages))
-                               h_language = "german";
-                       else if (is_known(h_language, known_ngerman_languages))
-                               h_language = "ngerman";
-                       else if (is_known(h_language, known_russian_languages))
-                               h_language = "russian";
-                       else if (is_known(h_language, known_ukrainian_languages))
-                               h_language = "ukrainian";
-                       h_quotes_language = h_language;
+                       
+                       // paper orientation
+                       if ((it = find(opts.begin(), opts.end(), "landscape")) != opts.end()) {
+                               h_paperorientation = "landscape";
+                               opts.erase(it);
+                       }
+                       // paper sides
+                       if ((it = find(opts.begin(), opts.end(), "oneside"))
+                                != opts.end()) {
+                               h_papersides = "1";
+                               opts.erase(it);
+                       }
+                       if ((it = find(opts.begin(), opts.end(), "twoside"))
+                                != opts.end()) {
+                               h_papersides = "2";
+                               opts.erase(it);
+                       }
+                       // paper columns
+                       if ((it = find(opts.begin(), opts.end(), "onecolumn"))
+                                != opts.end()) {
+                               h_papercolumns = "1";
+                               opts.erase(it);
+                       }
+                       if ((it = find(opts.begin(), opts.end(), "twocolumn"))
+                                != opts.end()) {
+                               h_papercolumns = "2";
+                               opts.erase(it);
+                       }
+                       // paper sizes
+                       // some size options are know to any document classes, other sizes
+                       // are handled by the \geometry command of the geometry package
+                       handle_opt(opts, known_class_paper_sizes, h_papersize);
+                       delete_opt(opts, known_class_paper_sizes);
+                       // the remaining options
                        h_options = join(opts, ",");
                        h_textclass = p.getArg('{', '}');
                }
@@ -561,16 +936,19 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                else if (t.cs() == "usepackage") {
                        string const options = p.getArg('[', ']');
                        string const name = p.getArg('{', '}');
-                       if (options.empty() && name.find(',')) {
-                               vector<string> vecnames;
-                               split(name, vecnames, ',');
-                               vector<string>::const_iterator it  = vecnames.begin();
-                               vector<string>::const_iterator end = vecnames.end();
-                               for (; it != end; ++it)
-                                       handle_package(trim(*it), string());
-                       } else {
-                               handle_package(name, options);
-                       }
+                       vector<string> vecnames;
+                       split(name, vecnames, ',');
+                       vector<string>::const_iterator it  = vecnames.begin();
+                       vector<string>::const_iterator end = vecnames.end();
+                       for (; it != end; ++it)
+                               handle_package(p, trim(*it), options, 
+                                              in_lyx_preamble);
+               }
+
+               else if (t.cs() == "inputencoding") {
+                       string const encoding = p.getArg('{','}');
+                       h_inputencoding = encoding;
+                       p.setEncoding(encoding);
                }
 
                else if (t.cs() == "newenvironment") {
@@ -581,18 +959,17 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                        ss << p.getOpt();
                        ss << '{' << p.verbatim_item() << '}';
                        ss << '{' << p.verbatim_item() << '}';
-                       if (name != "lyxcode" && name != "lyxlist" &&
-                           name != "lyxrightadress" &&
-                           name != "lyxaddress" && name != "lyxgreyedout")
+                       if (!in_lyx_preamble)
                                h_preamble << ss.str();
                }
 
                else if (t.cs() == "def") {
                        string name = p.get_token().cs();
                        while (p.next_token().cat() != catBegin)
-                               name += p.get_token().asString();
-                       h_preamble << "\\def\\" << name << '{'
-                                  << p.verbatim_item() << "}";
+                               name += p.get_token().cs();
+                       if (!in_lyx_preamble)
+                               h_preamble << "\\def\\" << name << '{'
+                                          << p.verbatim_item() << "}";
                }
 
                else if (t.cs() == "newcolumntype") {
@@ -625,15 +1002,37 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                else if (t.cs() == "setlength") {
                        string const name = p.verbatim_item();
                        string const content = p.verbatim_item();
-                       // Is this correct?
-                       if (name == "parskip")
-                               h_paragraph_separation = "skip";
-                       else if (name == "parindent")
-                               h_paragraph_separation = "skip";
-                       else
+                       // the paragraphs are only not indented when \parindent is set to zero
+                       if (name == "\\parindent" && content != "") {
+                               if (content[0] == '0')
+                                       h_paragraph_separation = "skip";
+                               else if (LYX_FORMAT >= 365)
+                                       h_paragraph_indentation = translate_len(content);
+                               else
+                                       h_preamble << "\\setlength{" << name
+                                                  << "}{" << content << "}";
+                       } else if (name == "\\parskip") {
+                               if (content == "\\smallskipamount")
+                                       h_defskip = "smallskip";
+                               else if (content == "\\medskipamount")
+                                       h_defskip = "medskip";
+                               else if (content == "\\bigskipamount")
+                                       h_defskip = "bigskip";
+                               else
+                                       h_defskip = content;
+                       } else
                                h_preamble << "\\setlength{" << name << "}{" << content << "}";
                }
 
+               else if (t.cs() == "onehalfspacing")
+                       h_spacing = "onehalf";
+
+               else if (t.cs() == "doublespacing")
+                       h_spacing = "double";
+
+               else if (t.cs() == "setstretch")
+                       h_spacing = "other " + p.verbatim_item();
+
                else if (t.cs() == "begin") {
                        string const name = p.getArg('{', '}');
                        if (name == "document")
@@ -641,7 +1040,39 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                        h_preamble << "\\begin{" << name << "}";
                }
 
+               else if (t.cs() == "geometry") {
+                       h_use_geometry = "true";
+                       vector<string> opts = split_options(p.getArg('{', '}'));
+                       vector<string>::iterator it;
+                       // paper orientation
+                       if ((it = find(opts.begin(), opts.end(), "landscape")) != opts.end()) {
+                               h_paperorientation = "landscape";
+                               opts.erase(it);
+                       }
+                       // paper size
+                       handle_opt(opts, known_paper_sizes, h_papersize);
+                       delete_opt(opts, known_paper_sizes);
+                       // page margins
+                       char const * const * margin = known_paper_margins;
+                       int k = -1;
+                       for (; *margin; ++margin) {
+                               k += 1;
+                               // search for the "=" in e.g. "lmargin=2cm" to get the value
+                               for(size_t i = 0; i != opts.size(); i++) {
+                                       if (opts.at(i).find(*margin) != string::npos) {
+                                               string::size_type pos = opts.at(i).find("=");
+                                               string value = opts.at(i).substr(pos + 1);
+                                               string name = known_coded_paper_margins[k];
+                                               h_margins += "\\" + name + " " + value + "\n";
+                                       }
+                               }
+                       }
+               }
+
                else if (t.cs() == "jurabibsetup") {
+                       // FIXME p.getArg('{', '}') is most probably wrong (it
+                       //       does not handle nested braces).
+                       //       Use p.verbatim_item() instead.
                        vector<string> jurabibsetup =
                                split_options(p.getArg('{', '}'));
                        // add jurabibsetup to the jurabib package options
@@ -652,30 +1083,41 @@ TextClass const parse_preamble(Parser & p, ostream & os, string const & forcecla
                        }
                }
 
-               else if (!t.cs().empty())
+               else if (t.cs() == "hypersetup") {
+                       vector<string> hypersetup =
+                               split_options(p.verbatim_item());
+                       // add hypersetup to the hyperref package options
+                       handle_hyperref(hypersetup);
+                       if (!hypersetup.empty()) {
+                               h_preamble << "\\hypersetup{"
+                                          << join(hypersetup, ",") << '}';
+                       }
+               }
+
+               else if (!t.cs().empty() && !in_lyx_preamble)
                        h_preamble << '\\' << t.cs();
        }
+
+       // remove the whitespace
        p.skip_spaces();
 
        // Force textclass if the user wanted it
        if (!forceclass.empty())
                h_textclass = forceclass;
-       if (noweb_mode && !lyx::support::prefixIs(h_textclass, "literate-"))
+       if (noweb_mode && !prefixIs(h_textclass, "literate-"))
                h_textclass.insert(0, "literate-");
        FileName layoutfilename = libFileSearch("layouts", h_textclass, "layout");
        if (layoutfilename.empty()) {
                cerr << "Error: Could not find layout file for textclass \"" << h_textclass << "\"." << endl;
                exit(1);
        }
-       TextClass textclass;
-       textclass.read(layoutfilename);
+       tc.read(layoutfilename);
        if (h_papersides.empty()) {
                ostringstream ss;
-               ss << textclass.sides();
+               ss << tc.sides();
                h_papersides = ss.str();
        }
-       end_preamble(os, textclass);
-       return textclass;
+       end_preamble(os, tc);
 }
 
 // }])