]> git.lyx.org Git - lyx.git/blobdiff - lib/lyx2lyx/lyx2lyx_tools.py
Use acadian rather than canadien
[lyx.git] / lib / lyx2lyx / lyx2lyx_tools.py
index 6c0f5aa819e6ad7036eb72d3b10dd6b042a4aef5..9c4fe0bb0bb0db2262babef1e187c5c64b205bb0 100644 (file)
@@ -84,14 +84,16 @@ insert_document_option(document, option):
 remove_document_option(document, option):
   Remove _option_ as a document option.
 
-revert_language(document, lyxname, babelname, polyglossianame):
+revert_language(document, lyxname, babelname="", polyglossianame=""):
   Reverts native language support to ERT
   If babelname or polyglossianame is empty, it is assumed
   this language package is not supported for the given language.
 '''
 
-import re
-from parser_tools import find_token, find_end_of_inset, get_containing_layout, get_value, get_bool_value
+from __future__ import print_function
+import re, sys
+from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
+                          get_containing_inset, get_value, get_bool_value)
 from unicode_symbols import unicode_reps
 
 # This will accept either a list of lines or a single line.
@@ -145,24 +147,38 @@ def insert_to_preamble(document, text, index = 0):
 # Created from the reversed list to keep the first of alternative definitions.
 licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
 
-def put_cmd_in_ert(cmd):
+def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
     """
     Return ERT inset wrapping `cmd` as a list of strings.
 
     `cmd` can be a string or list of lines. Non-ASCII characters are converted
-    to the respective LICR macros if defined in unicodesymbols.
+    to the respective LICR macros if defined in unicodesymbols,
+    `is_open` is a boolean setting the inset status to "open",
+    `as_paragraph` wraps the ERT inset in a Standard paragraph.
     """
-    ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
-    # It will be faster to work with a single string internally.
+
+    status = {False:"collapsed", True:"open"}
+    ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
+                 "\\begin_layout Plain Layout", "",
+                 # content here ([5:5])
+                 "\\end_layout", "", "\\end_inset"]
+
+    paragraph = ["\\begin_layout Standard",
+                 # content here ([1:1])
+                 "", "", "\\end_layout", ""]
+    # ensure cmd is an unicode instance and make it "LyX safe".
     if isinstance(cmd, list):
         cmd = u"\n".join(cmd)
-    else:
-        cmd = u"%s" % cmd # ensure it is an unicode instance
+    elif sys.version_info[0] == 2 and isinstance(cmd, str):
+        cmd = cmd.decode('utf8')
     cmd = cmd.translate(licr_table)
     cmd = cmd.replace("\\", "\n\\backslash\n")
-    ret += cmd.splitlines()
-    ret += ["\\end_layout", "", "\\end_inset"]
-    return ret
+
+    ert_inset[5:5] = cmd.splitlines()
+    if not as_paragraph:
+        return ert_inset
+    paragraph[1:1] = ert_inset
+    return paragraph
 
 
 def get_ert(lines, i, verbatim = False):
@@ -562,69 +578,55 @@ def insert_document_option(document, option):
     "Insert _option_ as a document option."
 
     # Find \options in the header
-    options_line = find_token(document.header, "\\options", 0)
-
+    i = find_token(document.header, "\\options", 0)
     # if the options does not exists add it after the textclass
-    if options_line == -1:
-        textclass_line = find_token(document.header, "\\textclass", 0)
-        document.header.insert(textclass_line +1,
-                               r"\options %s" % option)
+    if i == -1:
+        i = find_token(document.header, "\\textclass", 0) + 1
+        document.header.insert(i, r"\options %s" % option)
         return
-
-    # add it to the end of the options
-    document.header[options_line] += ",%s" % option
+    # otherwise append to options
+    if not is_document_option(document, option):
+        document.header[i] += ",%s" % option
 
 
 def remove_document_option(document, option):
-    """ Remove _option_ as a document option.
-
-    It is assumed that option belongs to the \options.
-    That can be done running is_document_option(document, option)."""
+    """ Remove _option_ as a document option."""
 
-    options_line = find_token(document.header, "\\options", 0)
-    option_pos = document.header[options_line].find(option)
+    i = find_token(document.header, "\\options")
+    options = get_value(document.header, "\\options", i)
+    options = [op.strip() for op in options.split(',')]
 
-    # Remove option from \options
-    comma_before_pos = document.header[options_line].rfind(',', 0, option_pos)
-    comma_after_pos  = document.header[options_line].find(',', option_pos)
+    # Remove `option` from \options
+    options = [op for op in options if op != option]
 
-    # if there are no commas then it is the single option
-    # and the options line should be removed since it will be empty
-    if comma_before_pos == comma_after_pos == -1:
-        del document.header[options_line]
-        return
-
-    # last option
-    options = document.header[options_line]
-    if comma_after_pos == -1:
-        document.header[options_line] = options[:comma_before_pos].rsplit()
-        return
-
-    document.header[options_line] = options[comma_before_pos: comma_after_pos]
+    if options:
+        document.header[i] = "\\options " + ','.join(options)
+    else:
+        del document.header[i]
 
 
 def is_document_option(document, option):
     "Find if _option_ is a document option"
 
-    # Find \options in the header
-    options_line = find_token(document.header, "\\options", 0)
-
-    # \options is not present in the header
-    if options_line == -1:
-        return False
-
-    option_pos = document.header[options_line].find(option)
-    # option is not present in the \options
-    if option_pos == -1:
-        return False
+    options = get_value(document.header, "\\options")
+    options = [op.strip() for op in options.split(',')]
+    return option in options
 
-    return True
 
+singlepar_insets = [s.strip() for s in
+    u"Argument, Caption Above, Caption Below, Caption Bicaption,"
+    u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
+    u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
+    u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
+    u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
+    u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
+    u"IPADeco, Index, Info, Phantom, Script".split(',')]
+# print(singlepar_insets)
 
-def revert_language(document, lyxname, babelname, polyglossianame):
+def revert_language(document, lyxname, babelname="", polyglossianame=""):
     " Revert native language support "
 
-    # Are we using polyglossia?
+    # Does the document use polyglossia?
     use_polyglossia = False
     if get_bool_value(document.header, "\\use_non_tex_fonts"):
         i = find_token(document.header, "\\language_package")
@@ -632,7 +634,7 @@ def revert_language(document, lyxname, babelname, polyglossianame):
             document.warning("Malformed document! Missing \\language_package")
         else:
             pack = get_value(document.header, "\\language_package", i)
-            if pack == "default" or pack == "auto":
+            if pack in ("default", "auto"):
                 use_polyglossia = True
 
     # Do we use this language with polyglossia?
@@ -641,134 +643,149 @@ def revert_language(document, lyxname, babelname, polyglossianame):
     with_babel = with_polyglossia == False and babelname != ""
 
     # Are we dealing with a primary or secondary language?
-    primary = False
+    primary = document.language == lyxname
     secondary = False
 
     # Main language first
-    if document.language == lyxname:
-        primary = True
+    orig_doc_language = document.language
+    if primary:
+        # Change LyX document language to English (we will tell LaTeX
+        # to use the original language at the end of this function):
         document.language = "english"
         i = find_token(document.header, "\\language %s" % lyxname, 0)
         if i != -1:
             document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        if with_polyglossia:
-            add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])
-            document.body[2 : 2] = ["\\begin_layout Standard",
-                                    "\\begin_inset ERT", "status open", "",
-                                    "\\begin_layout Plain Layout", "", "",
-                                    "\\backslash",
-                                    "resetdefaultlanguage{%s}" % polyglossianame,
-                                    "\\end_layout", "", "\\end_inset", "", "",
-                                    "\\end_layout", ""]
-
-    # Now secondary languages
+
+    # Now look for occurences in the body
     i = 0
     while True:
-        i = find_token(document.body, '\\lang', i)
+        i = find_token(document.body, "\\lang", i+1)
         if i == -1:
             break
-        if document.body[i].startswith('\\lang %s' % lyxname):
+        if document.body[i].startswith("\\lang %s" % lyxname):
             secondary = True
-            endlang = get_containing_layout(document.body, i)[2]
-            langswitch = find_token(document.body, '\\lang', i + 1, endlang)
-            startlayout = "\\begin_layout Standard"
-            endlayout = "\\end_layout"
-            if langswitch != -1:
-                endlang = langswitch
-                startlayout = ""
-                endlayout = ""
-            if with_polyglossia:
-                add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])     
-                document.body[endlang : endlang] = [startlayout,
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{%s}" % polyglossianame,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        endlayout, ""]
-            elif with_babel:
-                document.body[endlang : endlang] = [startlayout,
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{otherlanguage}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        endlayout, ""]
-            del document.body[i]
-            if with_polyglossia:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{%s}" % polyglossianame,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-            elif with_babel:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{otherlanguage}{%s}" % babelname,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-        elif primary and document.body[i].startswith('\\lang english'):
+            texname = use_polyglossia and polyglossianame or babelname
+        elif primary and document.body[i].startswith("\\lang english"):
             # Since we switched the main language manually, English parts need to be marked
-            endlang = get_containing_layout(document.body, i)[2]
-            langswitch = find_token(document.body, '\\lang', i + 1, endlang)
-            startlayout = "\\begin_layout Standard"
-            endlayout = "\\end_layout"
-            if langswitch != -1:
-                endlang = langswitch
-                startlayout = ""
-                endlayout = ""
+            texname = "english"
+        else:
+            continue
+
+        parent = get_containing_layout(document.body, i)
+        i_e = parent[2] # end line no,
+        # print(i, texname, parent, document.body[i+1], file=sys.stderr)
+        
+        # Move leading space to the previous line:
+        if document.body[i+1].startswith(" "):
+            document.body[i+1] = document.body[i+1][1:]
+            document.body.insert(i, " ")
+            continue
+        
+        # TODO: handle nesting issues with font attributes, e.g.
+        # \begin_layout Standard
+        # 
+        # \emph on
+        # \lang macedonian
+        # Македонски јазик
+        # \emph default
+        #  — јужнословенски јазик, дел од групата на словенски јазици од јазичното
+        #  семејство на индоевропски јазици.
+        #  Македонскиот е службен и национален јазик во Македонија.
+        # \end_layout
+        
+        # Ensure correct handling of list labels
+        if (parent[0] in ["Labeling", "Description"]
+            and not " " in "\n".join(document.body[parent[3]:i])):
+            # line `i+1` is first line of a list item,
+            # part before a space character is the label
+            # TODO: insets or language change before first space character
+            labelline = document.body[i+1].split(' ', 1)
+            if len(labelline) > 1:
+                # Insert a space in the (original) document language
+                # between label and remainder.
+                # print("  Label:", labelline, file=sys.stderr)
+                lines = [labelline[0],
+                    "\\lang %s" % orig_doc_language,
+                    " ",
+                    "\\lang %s" % (primary and "english" or lyxname),
+                    labelline[1]]
+                document.body[i+1:i+2] = lines
+                i_e += 4
+  
+        # Find out where to end the language change.
+        langswitch = i
+        while True:
+            langswitch = find_token(document.body, "\\lang", langswitch+1, i_e)
+            if langswitch == -1:
+                break
+            # print("  ", langswitch, document.body[langswitch], file=sys.stderr)
+            # skip insets
+            i_a = parent[3] # paragraph start line
+            container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a)
+            if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
+                # print("  inset", container, file=sys.stderr)
+                continue
+            i_e = langswitch
+            break
+        
+        # use function or environment?
+        singlepar = i_e - i < 3
+        if not singlepar and parent[0] == "Plain Layout":
+            # environment not allowed in some insets
+            container = get_containing_inset(document.body, i)
+            singlepar = container[0] in singlepar_insets
+            
+        # Delete empty language switches:
+        if not "".join(document.body[i+1:i_e]):
+            del document.body[i:i_e]
+            i -= 1
+            continue
+
+        if singlepar:
             if with_polyglossia:
-                parent = get_containing_layout(document.body, i)
-                document.body[endlang : endlang] = [startlayout,
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        endlayout, ""]
+                begin_cmd = "\\text%s{"%texname
             elif with_babel:
-                parent = get_containing_layout(document.body, i)
-                document.body[endlang : endlang] = [startlayout,
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{otherlanguage}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        endlayout, ""]
-            del document.body[i]
+                begin_cmd = "\\foreignlanguage{%s}{" % texname
+            end_cmd = "}"
+        else:
             if with_polyglossia:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
+                begin_cmd = "\\begin{%s}"%texname
+                end_cmd = "\\end{%s}"%texname
             elif with_babel:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{otherlanguage}{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-        else:
-            i += 1
+                begin_cmd = "\\begin{otherlanguage}{%s}" % texname
+                end_cmd = "\\end{otherlanguage}"
+
+        if (not primary or texname == "english"):
+            try:
+                document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
+                document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd)
+            except UnboundLocalError:
+                pass
+        del document.body[i]
+
+    if not (primary or secondary):
+        return
 
-    # With babel, we need to add the language options
-    if with_babel and (primary or secondary):
+    # Make the language known to Babel/Polyglossia and ensure the correct
+    # document language:
+    doc_lang_switch = ""
+    if with_babel:
+        # add as global option
         insert_document_option(document, babelname)
-        if secondary and document.body[10] != "selectlanguage{%s}" % document.language:
-            # Since the user options are always placed after the babel options,
-            # we need to reset the main language
-            document.body[2 : 2] = ["\\begin_layout Standard",
-                                    "\\begin_inset ERT", "status open", "",
-                                    "\\begin_layout Plain Layout", "", "",
-                                    "\\backslash",
-                                    "selectlanguage{%s}" % document.language,
-                                    "\\end_layout", "", "\\end_inset", "", "",
-                                    "\\end_layout", ""]
-
+        # Since user options are appended to the document options,
+        # Babel will treat `babelname` as primary language.
+        if not primary:
+            doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
+    if with_polyglossia:
+        # Define language in the user preamble
+        # (don't use \AtBeginDocument, this fails with some languages).
+        add_to_preamble(document, ["\\usepackage{polyglossia}",
+                                   "\\setotherlanguage{%s}" % polyglossianame])
+        if primary:
+            # Changing the main language must be done in the document body.
+            doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
+
+    # Reset LaTeX main language if required and not already done
+    if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
+        document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
+                                            is_open=True, as_paragraph=True)