From 7eca5d94d1dcdceb8f9a904918642e8370550690 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Uwe=20St=C3=B6hr?= Date: Fri, 8 Jun 2012 02:37:36 +0200 Subject: [PATCH] - support a bunch of new languages (fileformat change) - tex2lyx support for - fontspec fonts - polyglossia --- lib/languages | 181 ++++++++++++++----------- lib/lyx2lyx/lyx_2_1.py | 48 ++++++- src/BufferParams.cpp | 18 ++- src/tex2lyx/Preamble.cpp | 40 +++++- src/tex2lyx/Preamble.h | 1 + src/tex2lyx/test/XeTeX-polyglossia.tex | 44 ++++++ src/tex2lyx/text.cpp | 47 ++++++- src/version.h | 4 +- 8 files changed, 291 insertions(+), 92 deletions(-) create mode 100644 src/tex2lyx/test/XeTeX-polyglossia.tex diff --git a/lib/languages b/lib/languages index 769f706ef0..25169dc851 100644 --- a/lib/languages +++ b/lib/languages @@ -75,6 +75,7 @@ End # Real languages # +# not yet supported by polyglossia Language afrikaans GuiName "Afrikaans" BabelName afrikaans @@ -99,13 +100,14 @@ Language american LangCode en_US End -# Currently not supported (file format change!) -# Language ancientgreek -# GuiName "Greek (ancient)" -# PolyglossiaName greek -# PolyglossiaOpts variant=ancient -# LangCode grc_GR -# End +# not supported by babel +Language ancientgreek + GuiName "Greek (ancient)" + PolyglossiaName greek + PolyglossiaOpts variant=ancient + Encoding iso8859-7 + LangCode grc_GR +End # FIXME: dummy babel language for arabic_arabtex to be able # to switch the language the way of the ArabTeX-package @@ -127,6 +129,7 @@ Language arabic_arabi LangCode ar_SA End +# not supported by babel Language armenian GuiName "Armenian" PolyglossiaName armenian @@ -143,6 +146,7 @@ Language australian LangCode en_AU End +# not yet supported by polyglossia Language austrian GuiName "German (Austria, old spelling)" BabelName austrian @@ -150,6 +154,7 @@ Language austrian LangCode de_AT End +# not yet supported by polyglossia Language naustrian GuiName "German (Austria)" BabelName naustrian @@ -181,6 +186,7 @@ Language basque LangCode eu_ES End +# not yet supported by polyglossia Language belarusian GuiName "Belarusian" BabelName belarusian @@ -264,12 +270,13 @@ Language chinese-traditional LangCode zh_CN End -# Currently not supported (file format change!) -# Language coptic -# GuiName "Coptic" -# PolyglossiaName coptic -# LangCode cop_EG -# End +# not supported by babel +Language coptic + GuiName "Coptic" + PolyglossiaName coptic + Encoding utf8 + LangCode cop_EG +End Language croatian GuiName "Croatian" @@ -295,12 +302,13 @@ Language danish LangCode da_DK End -# Currently not supported (file format change!) -# Language divehi -# GuiName "Divehi (Maldivian)" -# PolyglossiaName divehi -# LangCode dv_MV -# End +# not supported by babel +Language divehi + GuiName "Divehi (Maldivian)" + PolyglossiaName divehi + Encoding utf8 + LangCode dv_MV +End Language dutch GuiName "Dutch" @@ -443,13 +451,13 @@ Language hebrew LangCode he_IL End -# Currently not supported (file format change!) -# Language hindi -# GuiName "Hindi" -# PolyglossiaName hindi -# Encoding utf8 -# LangCode hi_IN -# End +# not supported by babel +Language hindi + GuiName "Hindi" + PolyglossiaName hindi + Encoding utf8 + LangCode hi_IN +End # Currently not supported (file format change!) # "hungarian" is a synonym for the "magyar" babel language option @@ -514,6 +522,7 @@ Language japanese-cjk LangCode ja_JP End +# not yet supported by polyglossia Language kazakh GuiName "Kazakh" BabelName kazakh @@ -527,20 +536,21 @@ Language korean LangCode ko_KR End -# Currently not supported (file format change!) -# Language kurmanji -# GuiName "Kurmanji" -# PolyglossiaName kurmanji -# Encoding utf8 -# LangCode kmr -# End +# not supported by babel +Language kurmanji + GuiName "Kurmanji" + PolyglossiaName kurmanji + Encoding utf8 + LangCode kmr +End -# Currently not supported (file format change!) -# Language lao -# GuiName "Lao" -# PolyglossiaName lao -# LangCode lo_LA -# End +# not supported by babel +Language lao + GuiName "Lao" + PolyglossiaName lao + Encoding utf8 + LangCode lo_LA +End # There is no country code for Latin because it is a dead language. # We therefore the name of its hunspell dictionary. @@ -590,15 +600,17 @@ Language magyar LangCode hu_HU End -# Currently not supported (file format change!) -# Language marathi -# GuiName "Marathi" -# PolyglossiaName marathi -# LangCode mr_IN -# End +# not supported by babel +Language marathi + GuiName "Marathi" + PolyglossiaName marathi + Encoding utf8 + LangCode mr_IN +End # mongolian must be loaded locally with babel options, # not globally via class options +# not yet supported by polyglossia Language mongolian GuiName "Mongolian" BabelName mongolian @@ -632,12 +644,13 @@ Language nynorsk LangCode nn_NO End -# Currently not supported (file format change!) -# Language occitan -# GuiName "Occitan" -# PolyglossiaName occitan -# LangCode oc_FR -# End +# not supported by babel +Language occitan + GuiName "Occitan" + PolyglossiaName occitan + Encoding utf8 + LangCode oc_FR +End # Currently not supported (file format change!) # Russian orthography from the Petrine orthographic reforms of @@ -689,13 +702,13 @@ Language samin LangCode se_NO End -# Currently not supported (file format change!) -# Language sanskrit -# GuiName "Sanskrit" -# PolyglossiaName sanskrit -# Encoding utf8 -# LangCode sa_IN -# End +# not supported by babel +Language sanskrit + GuiName "Sanskrit" + PolyglossiaName sanskrit + Encoding utf8 + LangCode sa_IN +End Language scottish GuiName "Scottish" @@ -769,26 +782,29 @@ Language swedish LangCode sv_SE End -# Currently not supported (file format change!) -# Language syriac -# GuiName "Syriac" -# PolyglossiaName syriac -# LangCode syr_SY -# End +# not supported by babel +Language syriac + GuiName "Syriac" + PolyglossiaName syriac + Encoding utf8 + LangCode syr_SY +End -# Currently not supported (file format change!) -# Language tamil -# GuiName "Tamil" -# PolyglossiaName tamil -# LangCode ta_IN -# End +# not supported by babel +Language tamil + GuiName "Tamil" + PolyglossiaName tamil + Encoding utf8 + LangCode ta_IN +End -# Currently not supported (file format change!) -# Language telugu -# GuiName "Telugu" -# PolyglossiaName telugu -# LangCode te_IN -# End +# not supported by babel +Language telugu + GuiName "Telugu" + PolyglossiaName telugu + Encoding utf8 + LangCode te_IN +End Language thai GuiName "Thai" @@ -839,12 +855,13 @@ Language uppersorbian LangCode hsb_DE End -# Currently not supported (file format change!) -# Language urdu -# GuiName "Urdu" -# PolyglossiaName urdu -# LangCode ur_PK -# End +# not supported by babel +Language urdu + GuiName "Urdu" + PolyglossiaName urdu + Encoding utf8 + LangCode ur_PK +End # vietnam must be loaded locally with babel options, # not globally via class options, see diff --git a/lib/lyx2lyx/lyx_2_1.py b/lib/lyx2lyx/lyx_2_1.py index 395f011098..41cdec3ab4 100644 --- a/lib/lyx2lyx/lyx_2_1.py +++ b/lib/lyx2lyx/lyx_2_1.py @@ -214,15 +214,14 @@ def revert_australian(document): "Set English language variants Australian and Newzealand to English" if document.language == "australian" or document.language == "newzealand": - document.language = "english" + document.language = "english" i = find_token(document.header, "\\language", 0) if i != -1: document.header[i] = "\\language english" - j = 0 while True: j = find_token(document.body, "\\lang australian", j) - if j == -1: + if j == -1: j = find_token(document.body, "\\lang newzealand", 0) if j == -1: return @@ -767,6 +766,47 @@ def revert_use_amssymb(document): add_to_preamble(document, ["\\usepackage{amssymb}"]) +def revert_ancientgreek(document): + "Set the document language for ancientgreek to greek" + + if document.language == "ancientgreek": + document.language = "greek" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language greek" + j = 0 + while True: + j = find_token(document.body, "\\lang ancientgreek", j) + if j == -1: + return + else: + document.body[j] = document.body[j].replace("\\lang ancientgreek", "\\lang greek") + j += 1 + + +def revert_languages(document): + "Set the document language for new supported languages to English" + + languages = [ + "coptic", "divehi", "hindi", "kurmanji", "lao", "marathi", "occitan", "sanskrit", + "syriac", "tamil", "telugu", "urdu" + ] + for n in range(len(languages)): + if document.language == languages[n]: + document.language = "english" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language english" + j = 0 + while j < len(document.body): + j = find_token(document.body, "\\lang " + languages[n], j) + if j != -1: + document.body[j] = document.body[j].replace("\\lang " + languages[n], "\\lang english") + j += 1 + else: + j = len(document.body) + + ## # Conversion hub # @@ -791,9 +831,11 @@ convert = [ [429, [convert_table_rotation]], [430, [convert_listoflistings]], [431, [convert_use_amssymb]], + [432, []] ] revert = [ + [431, [revert_languages, revert_ancientgreek]], [430, [revert_use_amssymb]], [429, [revert_listoflistings]], [428, [revert_table_rotation]], diff --git a/src/BufferParams.cpp b/src/BufferParams.cpp index bfee4cff3f..b1a61b9b40 100644 --- a/src/BufferParams.cpp +++ b/src/BufferParams.cpp @@ -1244,8 +1244,22 @@ void BufferParams::validate(LaTeXFeatures & features) const features.require("color"); } - if (features.runparams().flavor == OutputParams::XETEX - && useNonTeXFonts) + // some languages are only available via polyglossia + if ( (features.runparams().flavor == OutputParams::XETEX + || language->lang() == "ancientgreek" + || language->lang() == "coptic" + || language->lang() == "divehi" + || language->lang() == "hindi" + || language->lang() == "kurmanji" + || language->lang() == "lao" + || language->lang() == "marathi" + || language->lang() == "occitan" + || language->lang() == "sanskrit" + || language->lang() == "syriac" + || language->lang() == "tamil" + || language->lang() == "telugu" + || language->lang() == "urdu" + ) && useNonTeXFonts) features.require("polyglossia"); if (language->lang() == "vietnamese") diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 131f1d8a47..a0210d1795 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -167,8 +167,9 @@ const char * const known_if_3arg_commands[] = {"@ifundefined", "IfFileExists", 0}; /// packages that work only in xetex +/// polyglossia is handled separately const char * const known_xetex_packages[] = {"arabxetex", "fixlatvian", -"fontbook", "fontwrap", "mathspec", "philokalia", "polyglossia", "unisugar", +"fontbook", "fontwrap", "mathspec", "philokalia", "unisugar", "xeCJK", "xecolor", "xecyr", "xeindex", "xepersian", "xunicode", 0}; /// packages that are automatically skipped if loaded by LyX @@ -417,6 +418,7 @@ Preamble::Preamble() : one_language(true), title_layout_found(false) h_font_sf_scale = "100"; h_font_tt_scale = "100"; h_graphics = "default"; + h_default_output_format = "default"; h_html_be_strict = "false"; h_html_css_as_file = "0"; h_html_math_output = "0"; @@ -687,6 +689,15 @@ void Preamble::handle_package(Parser &p, string const & name, h_preamble << "\\usepackage{babel}\n"; } + else if (name == "polyglossia") { + h_language_package = "default"; + h_default_output_format = "pdf4"; + h_use_non_tex_fonts = "true"; + xetex = true; + if (h_inputencoding == "auto") + p.setEncoding("utf8"); + } + else if (name == "fontenc") { h_fontencoding = getStringFromVector(options, ","); /* We could do the following for better round trip support, @@ -904,7 +915,8 @@ bool Preamble::writeLyXHeader(ostream & os, bool subdoc) << "\\font_osf " << h_font_osf << "\n" << "\\font_sf_scale " << h_font_sf_scale << "\n" << "\\font_tt_scale " << h_font_tt_scale << "\n" - << "\\graphics " << h_graphics << "\n"; + << "\\graphics " << h_graphics << "\n" + << "\\default_output_format " << h_default_output_format << "\n"; if (!h_float_placement.empty()) os << "\\float_placement " << h_float_placement << "\n"; os << "\\paperfontsize " << h_paperfontsize << "\n" @@ -1057,6 +1069,30 @@ void Preamble::parse(Parser & p, string const & forceclass, else if (t.cs() == "pagestyle") h_paperpagestyle = p.verbatim_item(); + else if (t.cs() == "setdefaultlanguage") + h_language = p.verbatim_item(); + + else if (t.cs() == "setotherlanguage") + ; + + else if (t.cs() == "setmainfont") { + // we don't care about the option + p.hasOpt() ? p.getOpt() : string(); + h_font_roman = p.getArg('{', '}'); + } + + else if (t.cs() == "setsansfont") { + // we don't care about the option + p.hasOpt() ? p.getOpt() : string(); + h_font_sans = p.getArg('{', '}'); + } + + else if (t.cs() == "setmonofont") { + // we don't care about the option + p.hasOpt() ? p.getOpt() : string(); + h_font_typewriter = p.getArg('{', '}'); + } + else if (t.cs() == "date") { string argument = p.getArg('{', '}'); if (argument.empty()) diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index 5fe6c572bd..5690e1903c 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -105,6 +105,7 @@ private: std::string h_font_sf_scale; std::string h_font_tt_scale; std::string h_graphics; + std::string h_default_output_format; std::string h_html_be_strict; std::string h_html_css_as_file; std::string h_html_math_output; diff --git a/src/tex2lyx/test/XeTeX-polyglossia.tex b/src/tex2lyx/test/XeTeX-polyglossia.tex new file mode 100644 index 0000000000..3ea7c1e53a --- /dev/null +++ b/src/tex2lyx/test/XeTeX-polyglossia.tex @@ -0,0 +1,44 @@ +%% LyX 2.1.0svn created this file. For more info, see http://www.lyx.org/. +%% Do not edit unless you really know what you are doing. +\documentclass[albanian]{article} +\usepackage{fontspec} +\setmainfont[Mapping=tex-text]{Aharoni} +\setsansfont[Mapping=tex-text]{AngsanaUPC} +\setmonofont{Arial Black} +\usepackage{xunicode} +\usepackage{polyglossia} +\setdefaultlanguage{coptic} +\setotherlanguage{albanian} +\begin{document} + +\part{df} + +1 + +\begin{albanian}% + +\part{df} + +2 + +\end{albanian}% +\begin{divehi}% + +\part{df} + +3 + +\end{divehi}% +\begin{syriac}% + +\part{df} + +4 + +\end{syriac}% +\begin{divehi}% + +\part{df} + +5 +\end{document} diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 46d4d678b4..7e19940634 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -117,6 +117,45 @@ char const * const known_ref_commands[] = { "ref", "pageref", "vref", char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref", "vpageref", "formatted", "eqref", 0 }; +/** + * known polyglossia language names (inluding synomyms) + */ +const char * const polyglossia_languages[] = { +"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", +"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil", +"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", +"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish", +"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", +"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil", +"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french", +"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", +"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek", +"marathi", "spanish", 0}; + +/** + * the same as known_languages with .lyx names + * please keep this in sync with known_languages line by line! + */ +const char * const coded_polyglossia_languages[] = { +"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", +"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil", +"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", +"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish", +"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", +"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian", +"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french", +"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", +"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek", +"marathi", "spanish", 0}; + +string polyglossia2lyx(string const & language) +{ + char const * const * where = is_known(language, polyglossia_languages); + if (where) + return coded_polyglossia_languages[where - polyglossia_languages]; + return language; +} + /*! * natbib commands. * The starred forms are also known except for "citefullauthor", @@ -1212,6 +1251,13 @@ void parse_environment(Parser & p, ostream & os, bool outer, } } + else if (is_known(name, polyglossia_languages)) { + parent_context.check_layout(os); + parent_context.font.language = polyglossia2lyx(name); + os << "\n\\lang " << parent_context.font.language << "\n"; + p.skip_spaces(); + } + else if (unstarred_name == "tabular" || name == "longtable") { eat_whitespace(p, os, parent_context, false); string width = "0pt"; @@ -3401,7 +3447,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.check_layout(os); // save the language for the case that a // \foreignlanguage is used - context.font.language = babel2lyx(p.verbatim_item()); os << "\n\\lang " << context.font.language << "\n"; } diff --git a/src/version.h b/src/version.h index 5cd0fb136d..86dc4b451a 100644 --- a/src/version.h +++ b/src/version.h @@ -30,8 +30,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 431 // gb: load switch for amssymb -#define LYX_FORMAT_TEX2LYX 431 // gb: load switch for amssymb +#define LYX_FORMAT_LYX 432 // uwestoehr: support for many languages like Telugu +#define LYX_FORMAT_TEX2LYX 432 // uwestoehr: support for polyglossia/XeTeX #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER -- 2.39.5