X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx2lyx_tools.py;h=9c4fe0bb0bb0db2262babef1e187c5c64b205bb0;hb=6ab3be039ee0d74bbb7782bae8e1e0b278d14b3d;hp=75a5c820def0b75b958f5b13505e2780ff08fcec;hpb=abf38e1820a6e491fe19211538436b0680866879;p=lyx.git diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py index 75a5c820de..9c4fe0bb0b 100644 --- a/lib/lyx2lyx/lyx2lyx_tools.py +++ b/lib/lyx2lyx/lyx2lyx_tools.py @@ -84,14 +84,16 @@ insert_document_option(document, option): remove_document_option(document, option): Remove _option_ as a document option. -revert_language(document, lyxname, babelname, polyglossianame): +revert_language(document, lyxname, babelname="", polyglossianame=""): Reverts native language support to ERT If babelname or polyglossianame is empty, it is assumed this language package is not supported for the given language. ''' -import re -from parser_tools import find_token, find_end_of_inset, get_containing_layout, get_value, get_bool_value +from __future__ import print_function +import re, sys +from parser_tools import (find_token, find_end_of_inset, get_containing_layout, + get_containing_inset, get_value, get_bool_value) from unicode_symbols import unicode_reps # This will accept either a list of lines or a single line. @@ -145,24 +147,38 @@ def insert_to_preamble(document, text, index = 0): # Created from the reversed list to keep the first of alternative definitions. licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1]) -def put_cmd_in_ert(cmd): +def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False): """ Return ERT inset wrapping `cmd` as a list of strings. `cmd` can be a string or list of lines. Non-ASCII characters are converted - to the respective LICR macros if defined in unicodesymbols. + to the respective LICR macros if defined in unicodesymbols, + `is_open` is a boolean setting the inset status to "open", + `as_paragraph` wraps the ERT inset in a Standard paragraph. """ - ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""] - # It will be faster to work with a single string internally. + + status = {False:"collapsed", True:"open"} + ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "", + "\\begin_layout Plain Layout", "", + # content here ([5:5]) + "\\end_layout", "", "\\end_inset"] + + paragraph = ["\\begin_layout Standard", + # content here ([1:1]) + "", "", "\\end_layout", ""] + # ensure cmd is an unicode instance and make it "LyX safe". if isinstance(cmd, list): cmd = u"\n".join(cmd) - else: - cmd = u"%s" % cmd # ensure it is an unicode instance + elif sys.version_info[0] == 2 and isinstance(cmd, str): + cmd = cmd.decode('utf8') cmd = cmd.translate(licr_table) cmd = cmd.replace("\\", "\n\\backslash\n") - ret += cmd.splitlines() - ret += ["\\end_layout", "", "\\end_inset"] - return ret + + ert_inset[5:5] = cmd.splitlines() + if not as_paragraph: + return ert_inset + paragraph[1:1] = ert_inset + return paragraph def get_ert(lines, i, verbatim = False): @@ -562,69 +578,55 @@ def insert_document_option(document, option): "Insert _option_ as a document option." # Find \options in the header - options_line = find_token(document.header, "\\options", 0) - + i = find_token(document.header, "\\options", 0) # if the options does not exists add it after the textclass - if options_line == -1: - textclass_line = find_token(document.header, "\\textclass", 0) - document.header.insert(textclass_line +1, - r"\options %s" % option) + if i == -1: + i = find_token(document.header, "\\textclass", 0) + 1 + document.header.insert(i, r"\options %s" % option) return - - # add it to the end of the options - document.header[options_line] += ",%s" % option + # otherwise append to options + if not is_document_option(document, option): + document.header[i] += ",%s" % option def remove_document_option(document, option): - """ Remove _option_ as a document option. - - It is assumed that option belongs to the \options. - That can be done running is_document_option(document, option).""" + """ Remove _option_ as a document option.""" - options_line = find_token(document.header, "\\options", 0) - option_pos = document.header[options_line].find(option) + i = find_token(document.header, "\\options") + options = get_value(document.header, "\\options", i) + options = [op.strip() for op in options.split(',')] - # Remove option from \options - comma_before_pos = document.header[options_line].rfind(',', 0, option_pos) - comma_after_pos = document.header[options_line].find(',', option_pos) + # Remove `option` from \options + options = [op for op in options if op != option] - # if there are no commas then it is the single option - # and the options line should be removed since it will be empty - if comma_before_pos == comma_after_pos == -1: - del document.header[options_line] - return - - # last option - options = document.header[options_line] - if comma_after_pos == -1: - document.header[options_line] = options[:comma_before_pos].rsplit() - return - - document.header[options_line] = options[comma_before_pos: comma_after_pos] + if options: + document.header[i] = "\\options " + ','.join(options) + else: + del document.header[i] def is_document_option(document, option): "Find if _option_ is a document option" - # Find \options in the header - options_line = find_token(document.header, "\\options", 0) - - # \options is not present in the header - if options_line == -1: - return False + options = get_value(document.header, "\\options") + options = [op.strip() for op in options.split(',')] + return option in options - option_pos = document.header[options_line].find(option) - # option is not present in the \options - if option_pos == -1: - return False - return True +singlepar_insets = [s.strip() for s in + u"Argument, Caption Above, Caption Below, Caption Bicaption," + u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table," + u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides," + u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup," + u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File," + u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle," + u"IPADeco, Index, Info, Phantom, Script".split(',')] +# print(singlepar_insets) - -def revert_language(document, lyxname, babelname, polyglossianame): +def revert_language(document, lyxname, babelname="", polyglossianame=""): " Revert native language support " - # Are we using polyglossia? + # Does the document use polyglossia? use_polyglossia = False if get_bool_value(document.header, "\\use_non_tex_fonts"): i = find_token(document.header, "\\language_package") @@ -632,7 +634,7 @@ def revert_language(document, lyxname, babelname, polyglossianame): document.warning("Malformed document! Missing \\language_package") else: pack = get_value(document.header, "\\language_package", i) - if pack == "default" or pack == "auto": + if pack in ("default", "auto"): use_polyglossia = True # Do we use this language with polyglossia? @@ -641,135 +643,149 @@ def revert_language(document, lyxname, babelname, polyglossianame): with_babel = with_polyglossia == False and babelname != "" # Are we dealing with a primary or secondary language? - primary = False + primary = document.language == lyxname secondary = False - orig_doc_language = document.language # Main language first - if document.language == lyxname: - primary = True + orig_doc_language = document.language + if primary: + # Change LyX document language to English (we will tell LaTeX + # to use the original language at the end of this function): document.language = "english" i = find_token(document.header, "\\language %s" % lyxname, 0) if i != -1: document.header[i] = "\\language english" - j = find_token(document.header, "\\language_package default", 0) - if j != -1: - document.header[j] = "\\language_package default" - if with_polyglossia: - add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame]) - document.body[2 : 2] = ["\\begin_layout Standard", - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "resetdefaultlanguage{%s}" % polyglossianame, - "\\end_layout", "", "\\end_inset", "", "", - "\\end_layout", ""] - - # Now secondary languages + + # Now look for occurences in the body i = 0 while True: - i = find_token(document.body, '\\lang', i) + i = find_token(document.body, "\\lang", i+1) if i == -1: break - if document.body[i].startswith('\\lang %s' % lyxname): + if document.body[i].startswith("\\lang %s" % lyxname): secondary = True - endlang = get_containing_layout(document.body, i)[2] - langswitch = find_token(document.body, '\\lang', i + 1, endlang) - startlayout = "\\begin_layout Standard" - endlayout = "\\end_layout" - if langswitch != -1: - endlang = langswitch - startlayout = "" - endlayout = "" - if with_polyglossia: - add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame]) - document.body[endlang : endlang] = [startlayout, - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "end{%s}" % polyglossianame, - "\\end_layout", "", "\\end_inset", "", "", - endlayout, ""] - elif with_babel: - document.body[endlang : endlang] = [startlayout, - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "end{otherlanguage}", - "\\end_layout", "", "\\end_inset", "", "", - endlayout, ""] - del document.body[i] - if with_polyglossia: - document.body[i : i] = ["\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "begin{%s}" % polyglossianame, - "\\end_layout", "", "\\end_inset", "", "", - ""] - elif with_babel: - document.body[i : i] = ["\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "begin{otherlanguage}{%s}" % babelname, - "\\end_layout", "", "\\end_inset", "", "", - ""] - elif primary and document.body[i].startswith('\\lang english'): + texname = use_polyglossia and polyglossianame or babelname + elif primary and document.body[i].startswith("\\lang english"): # Since we switched the main language manually, English parts need to be marked - endlang = get_containing_layout(document.body, i)[2] - langswitch = find_token(document.body, '\\lang', i + 1, endlang) - startlayout = "\\begin_layout Standard" - endlayout = "\\end_layout" - if langswitch != -1: - endlang = langswitch - startlayout = "" - endlayout = "" + texname = "english" + else: + continue + + parent = get_containing_layout(document.body, i) + i_e = parent[2] # end line no, + # print(i, texname, parent, document.body[i+1], file=sys.stderr) + + # Move leading space to the previous line: + if document.body[i+1].startswith(" "): + document.body[i+1] = document.body[i+1][1:] + document.body.insert(i, " ") + continue + + # TODO: handle nesting issues with font attributes, e.g. + # \begin_layout Standard + # + # \emph on + # \lang macedonian + # Македонски јазик + # \emph default + # — јужнословенски јазик, дел од групата на словенски јазици од јазичното + # семејство на индоевропски јазици. + # Македонскиот е службен и национален јазик во Македонија. + # \end_layout + + # Ensure correct handling of list labels + if (parent[0] in ["Labeling", "Description"] + and not " " in "\n".join(document.body[parent[3]:i])): + # line `i+1` is first line of a list item, + # part before a space character is the label + # TODO: insets or language change before first space character + labelline = document.body[i+1].split(' ', 1) + if len(labelline) > 1: + # Insert a space in the (original) document language + # between label and remainder. + # print(" Label:", labelline, file=sys.stderr) + lines = [labelline[0], + "\\lang %s" % orig_doc_language, + " ", + "\\lang %s" % (primary and "english" or lyxname), + labelline[1]] + document.body[i+1:i+2] = lines + i_e += 4 + + # Find out where to end the language change. + langswitch = i + while True: + langswitch = find_token(document.body, "\\lang", langswitch+1, i_e) + if langswitch == -1: + break + # print(" ", langswitch, document.body[langswitch], file=sys.stderr) + # skip insets + i_a = parent[3] # paragraph start line + container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a) + if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a: + # print(" inset", container, file=sys.stderr) + continue + i_e = langswitch + break + + # use function or environment? + singlepar = i_e - i < 3 + if not singlepar and parent[0] == "Plain Layout": + # environment not allowed in some insets + container = get_containing_inset(document.body, i) + singlepar = container[0] in singlepar_insets + + # Delete empty language switches: + if not "".join(document.body[i+1:i_e]): + del document.body[i:i_e] + i -= 1 + continue + + if singlepar: if with_polyglossia: - parent = get_containing_layout(document.body, i) - document.body[endlang : endlang] = [startlayout, - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "end{english}", - "\\end_layout", "", "\\end_inset", "", "", - endlayout, ""] + begin_cmd = "\\text%s{"%texname elif with_babel: - parent = get_containing_layout(document.body, i) - document.body[endlang : endlang] = [startlayout, - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "end{otherlanguage}", - "\\end_layout", "", "\\end_inset", "", "", - endlayout, ""] - del document.body[i] + begin_cmd = "\\foreignlanguage{%s}{" % texname + end_cmd = "}" + else: if with_polyglossia: - document.body[i : i] = ["\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "begin{english}", - "\\end_layout", "", "\\end_inset", "", "", - ""] + begin_cmd = "\\begin{%s}"%texname + end_cmd = "\\end{%s}"%texname elif with_babel: - document.body[i : i] = ["\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "begin{otherlanguage}{english}", - "\\end_layout", "", "\\end_inset", "", "", - ""] - else: - i += 1 + begin_cmd = "\\begin{otherlanguage}{%s}" % texname + end_cmd = "\\end{otherlanguage}" + + if (not primary or texname == "english"): + try: + document.body[i_e:i_e] = put_cmd_in_ert(end_cmd) + document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd) + except UnboundLocalError: + pass + del document.body[i] + + if not (primary or secondary): + return - # With babel, we need to add the language options - if with_babel and (primary or secondary): + # Make the language known to Babel/Polyglossia and ensure the correct + # document language: + doc_lang_switch = "" + if with_babel: + # add as global option insert_document_option(document, babelname) - if secondary and document.body[10] != "selectlanguage{%s}" % orig_doc_language: - # Since the user options are always placed after the babel options, - # we need to reset the main language - document.body[2 : 2] = ["\\begin_layout Standard", - "\\begin_inset ERT", "status open", "", - "\\begin_layout Plain Layout", "", "", - "\\backslash", - "selectlanguage{%s}" % orig_doc_language, - "\\end_layout", "", "\\end_inset", "", "", - "\\end_layout", ""] - + # Since user options are appended to the document options, + # Babel will treat `babelname` as primary language. + if not primary: + doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language + if with_polyglossia: + # Define language in the user preamble + # (don't use \AtBeginDocument, this fails with some languages). + add_to_preamble(document, ["\\usepackage{polyglossia}", + "\\setotherlanguage{%s}" % polyglossianame]) + if primary: + # Changing the main language must be done in the document body. + doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame + + # Reset LaTeX main language if required and not already done + if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]: + document.body[2:2] = put_cmd_in_ert(doc_lang_switch, + is_open=True, as_paragraph=True)