Use acadian rather than canadien

[lyx.git] / lib / lyx2lyx / lyx2lyx_tools.py
diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py

index 51b4ec4411a90262d8c23b8fb7f9a3db9fc2ed88..9c4fe0bb0bb0db2262babef1e187c5c64b205bb0 100644 (file)
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@@ -84,14 +84,16 @@ insert_document_option(document, option):
  remove_document_option(document, option):
    Remove _option_ as a document option.
  
  remove_document_option(document, option):
    Remove _option_ as a document option.
  
-revert_language(document, lyxname, babelname, polyglossianame):
+revert_language(document, lyxname, babelname="", polyglossianame=""):
    Reverts native language support to ERT
    If babelname or polyglossianame is empty, it is assumed
    this language package is not supported for the given language.
  '''
  
    Reverts native language support to ERT
    If babelname or polyglossianame is empty, it is assumed
    this language package is not supported for the given language.
  '''
  
-import re
-from parser_tools import find_token, find_end_of_inset, get_containing_layout, get_value, get_bool_value
+from __future__ import print_function
+import re, sys
+from parser_tools import (find_token, find_end_of_inset, get_containing_layout,
+                          get_containing_inset, get_value, get_bool_value)
  from unicode_symbols import unicode_reps
  
  # This will accept either a list of lines or a single line.
  from unicode_symbols import unicode_reps
  
  # This will accept either a list of lines or a single line.
@@ -145,24 +147,38 @@ def insert_to_preamble(document, text, index = 0):
  # Created from the reversed list to keep the first of alternative definitions.
  licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
  
  # Created from the reversed list to keep the first of alternative definitions.
  licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
  
-def put_cmd_in_ert(cmd):
+def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
      """
      Return ERT inset wrapping `cmd` as a list of strings.
  
      `cmd` can be a string or list of lines. Non-ASCII characters are converted
      """
      Return ERT inset wrapping `cmd` as a list of strings.
  
      `cmd` can be a string or list of lines. Non-ASCII characters are converted
-    to the respective LICR macros if defined in unicodesymbols.
+    to the respective LICR macros if defined in unicodesymbols,
+    `is_open` is a boolean setting the inset status to "open",
+    `as_paragraph` wraps the ERT inset in a Standard paragraph.
      """
      """
-    ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
-    # It will be faster to work with a single string internally.
+
+    status = {False:"collapsed", True:"open"}
+    ert_inset = ["\\begin_inset ERT", "status %s"%status[is_open], "",
+                 "\\begin_layout Plain Layout", "",
+                 # content here ([5:5])
+                 "\\end_layout", "", "\\end_inset"]
+
+    paragraph = ["\\begin_layout Standard",
+                 # content here ([1:1])
+                 "", "", "\\end_layout", ""]
+    # ensure cmd is an unicode instance and make it "LyX safe".
      if isinstance(cmd, list):
          cmd = u"\n".join(cmd)
      if isinstance(cmd, list):
          cmd = u"\n".join(cmd)
-    else:
-        cmd = u"%s" % cmd # ensure it is an unicode instance
+    elif sys.version_info[0] == 2 and isinstance(cmd, str):
+        cmd = cmd.decode('utf8')
      cmd = cmd.translate(licr_table)
      cmd = cmd.replace("\\", "\n\\backslash\n")
      cmd = cmd.translate(licr_table)
      cmd = cmd.replace("\\", "\n\\backslash\n")
-    ret += cmd.splitlines()
-    ret += ["\\end_layout", "", "\\end_inset"]
-    return ret
+
+    ert_inset[5:5] = cmd.splitlines()
+    if not as_paragraph:
+        return ert_inset
+    paragraph[1:1] = ert_inset
+    return paragraph
  
  
  def get_ert(lines, i, verbatim = False):
  
  
  def get_ert(lines, i, verbatim = False):
@@ -562,69 +578,55 @@ def insert_document_option(document, option):
      "Insert _option_ as a document option."
  
      # Find \options in the header
      "Insert _option_ as a document option."
  
      # Find \options in the header
-    options_line = find_token(document.header, "\\options", 0)
-
+    i = find_token(document.header, "\\options", 0)
      # if the options does not exists add it after the textclass
      # if the options does not exists add it after the textclass
-    if options_line == -1:
-        textclass_line = find_token(document.header, "\\textclass", 0)
-        document.header.insert(textclass_line +1,
-                               r"\options %s" % option)
+    if i == -1:
+        i = find_token(document.header, "\\textclass", 0) + 1
+        document.header.insert(i, r"\options %s" % option)
          return
          return
-
-    # add it to the end of the options
-    document.header[options_line] += ",%s" % option
+    # otherwise append to options
+    if not is_document_option(document, option):
+        document.header[i] += ",%s" % option
  
  
  def remove_document_option(document, option):
  
  
  def remove_document_option(document, option):
-    """ Remove _option_ as a document option.
+    """ Remove _option_ as a document option."""
  
  
-    It is assumed that option belongs to the \options.
-    That can be done running is_document_option(document, option)."""
+    i = find_token(document.header, "\\options")
+    options = get_value(document.header, "\\options", i)
+    options = [op.strip() for op in options.split(',')]
  
  
-    options_line = find_token(document.header, "\\options", 0)
-    option_pos = document.header[options_line].find(option)
+    # Remove `option` from \options
+    options = [op for op in options if op != option]
  
  
-    # Remove option from \options
-    comma_before_pos = document.header[options_line].rfind(',', 0, option_pos)
-    comma_after_pos  = document.header[options_line].find(',', option_pos)
-
-    # if there are no commas then it is the single option
-    # and the options line should be removed since it will be empty
-    if comma_before_pos == comma_after_pos == -1:
-        del document.header[options_line]
-        return
-
-    # last option
-    options = document.header[options_line]
-    if comma_after_pos == -1:
-        document.header[options_line] = options[:comma_before_pos].rsplit()
-        return
-
-    document.header[options_line] = options[comma_before_pos: comma_after_pos]
+    if options:
+        document.header[i] = "\\options " + ','.join(options)
+    else:
+        del document.header[i]
  
  
  def is_document_option(document, option):
      "Find if _option_ is a document option"
  
  
  
  def is_document_option(document, option):
      "Find if _option_ is a document option"
  
-    # Find \options in the header
-    options_line = find_token(document.header, "\\options", 0)
-
-    # \options is not present in the header
-    if options_line == -1:
-        return False
+    options = get_value(document.header, "\\options")
+    options = [op.strip() for op in options.split(',')]
+    return option in options
  
  
-    option_pos = document.header[options_line].find(option)
-    # option is not present in the \options
-    if option_pos == -1:
-        return False
  
  
-    return True
+singlepar_insets = [s.strip() for s in
+    u"Argument, Caption Above, Caption Below, Caption Bicaption,"
+    u"Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
+    u"Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
+    u"Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
+    u"Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
+    u"Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
+    u"IPADeco, Index, Info, Phantom, Script".split(',')]
+# print(singlepar_insets)
  
  
-
-def revert_language(document, lyxname, babelname, polyglossianame):
+def revert_language(document, lyxname, babelname="", polyglossianame=""):
      " Revert native language support "
  
      " Revert native language support "
  
-    # Are we using polyglossia?
+    # Does the document use polyglossia?
      use_polyglossia = False
      if get_bool_value(document.header, "\\use_non_tex_fonts"):
          i = find_token(document.header, "\\language_package")
      use_polyglossia = False
      if get_bool_value(document.header, "\\use_non_tex_fonts"):
          i = find_token(document.header, "\\language_package")
@@ -632,7 +634,7 @@ def revert_language(document, lyxname, babelname, polyglossianame):
              document.warning("Malformed document! Missing \\language_package")
          else:
              pack = get_value(document.header, "\\language_package", i)
              document.warning("Malformed document! Missing \\language_package")
          else:
              pack = get_value(document.header, "\\language_package", i)
-            if pack == "default" or pack == "auto":
+            if pack in ("default", "auto"):
                  use_polyglossia = True
  
      # Do we use this language with polyglossia?
                  use_polyglossia = True
  
      # Do we use this language with polyglossia?
@@ -641,120 +643,149 @@ def revert_language(document, lyxname, babelname, polyglossianame):
      with_babel = with_polyglossia == False and babelname != ""
  
      # Are we dealing with a primary or secondary language?
      with_babel = with_polyglossia == False and babelname != ""
  
      # Are we dealing with a primary or secondary language?
-    primary = False
+    primary = document.language == lyxname
      secondary = False
  
      # Main language first
      secondary = False
  
      # Main language first
-    if document.language == lyxname:
-        primary = True
+    orig_doc_language = document.language
+    if primary:
+        # Change LyX document language to English (we will tell LaTeX
+        # to use the original language at the end of this function):
          document.language = "english"
          i = find_token(document.header, "\\language %s" % lyxname, 0)
          if i != -1:
              document.header[i] = "\\language english"
          document.language = "english"
          i = find_token(document.header, "\\language %s" % lyxname, 0)
          if i != -1:
              document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        if with_polyglossia:
-            add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])
-            document.body[2 : 2] = ["\\begin_layout Standard",
-                                    "\\begin_inset ERT", "status open", "",
-                                    "\\begin_layout Plain Layout", "", "",
-                                    "\\backslash",
-                                    "resetdefaultlanguage{%s}" % polyglossianame,
-                                    "\\end_layout", "", "\\end_inset", "", "",
-                                    "\\end_layout", ""]
-
-    # Now secondary languages
+
+    # Now look for occurences in the body
      i = 0
      while True:
      i = 0
      while True:
-        i = find_token(document.body, '\\lang', i)
+        i = find_token(document.body, "\\lang", i+1)
          if i == -1:
          if i == -1:
-            return
-        if document.body[i].startswith('\\lang %s' % lyxname):
+            break
+        if document.body[i].startswith("\\lang %s" % lyxname):
              secondary = True
              secondary = True
-            if with_polyglossia:
-                add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])
-                parent = get_containing_layout(document.body, i)
-                document.body[parent[2] : parent[2]] = ["\\begin_layout Standard",
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{%s}" % polyglossianame,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        "\\end_layout", ""]
-            elif with_babel:
-                parent = get_containing_layout(document.body, i)
-                document.body[parent[2] : parent[2]] = ["\\begin_layout Standard",
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{otherlanguage}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        "\\end_layout", ""]
-            del document.body[i]
-            if with_polyglossia:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{%s}" % polyglossianame,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-            elif with_babel:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{otherlanguage}{%s}" % babelname,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-        elif primary and document.body[i].startswith('\\lang english'):
+            texname = use_polyglossia and polyglossianame or babelname
+        elif primary and document.body[i].startswith("\\lang english"):
              # Since we switched the main language manually, English parts need to be marked
              # Since we switched the main language manually, English parts need to be marked
+            texname = "english"
+        else:
+            continue
+
+        parent = get_containing_layout(document.body, i)
+        i_e = parent[2] # end line no,
+        # print(i, texname, parent, document.body[i+1], file=sys.stderr)
+        
+        # Move leading space to the previous line:
+        if document.body[i+1].startswith(" "):
+            document.body[i+1] = document.body[i+1][1:]
+            document.body.insert(i, " ")
+            continue
+        
+        # TODO: handle nesting issues with font attributes, e.g.
+        # \begin_layout Standard
+        # 
+        # \emph on
+        # \lang macedonian
+        # Македонски јазик
+        # \emph default
+        #  — јужнословенски јазик, дел од групата на словенски јазици од јазичното
+        #  семејство на индоевропски јазици.
+        #  Македонскиот е службен и национален јазик во Македонија.
+        # \end_layout
+        
+        # Ensure correct handling of list labels
+        if (parent[0] in ["Labeling", "Description"]
+            and not " " in "\n".join(document.body[parent[3]:i])):
+            # line `i+1` is first line of a list item,
+            # part before a space character is the label
+            # TODO: insets or language change before first space character
+            labelline = document.body[i+1].split(' ', 1)
+            if len(labelline) > 1:
+                # Insert a space in the (original) document language
+                # between label and remainder.
+                # print("  Label:", labelline, file=sys.stderr)
+                lines = [labelline[0],
+                    "\\lang %s" % orig_doc_language,
+                    " ",
+                    "\\lang %s" % (primary and "english" or lyxname),
+                    labelline[1]]
+                document.body[i+1:i+2] = lines
+                i_e += 4
+  
+        # Find out where to end the language change.
+        langswitch = i
+        while True:
+            langswitch = find_token(document.body, "\\lang", langswitch+1, i_e)
+            if langswitch == -1:
+                break
+            # print("  ", langswitch, document.body[langswitch], file=sys.stderr)
+            # skip insets
+            i_a = parent[3] # paragraph start line
+            container = get_containing_inset(document.body[i_a:i_e], langswitch-i_a)
+            if container and container[1] < langswitch-i_a and container[2] > langswitch-i_a:
+                # print("  inset", container, file=sys.stderr)
+                continue
+            i_e = langswitch
+            break
+        
+        # use function or environment?
+        singlepar = i_e - i < 3
+        if not singlepar and parent[0] == "Plain Layout":
+            # environment not allowed in some insets
+            container = get_containing_inset(document.body, i)
+            singlepar = container[0] in singlepar_insets
+            
+        # Delete empty language switches:
+        if not "".join(document.body[i+1:i_e]):
+            del document.body[i:i_e]
+            i -= 1
+            continue
+
+        if singlepar:
              if with_polyglossia:
              if with_polyglossia:
-                parent = get_containing_layout(document.body, i)
-                document.body[parent[2] : parent[2]] = ["\\begin_layout Standard",
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        "\\end_layout", ""]
+                begin_cmd = "\\text%s{"%texname
              elif with_babel:
              elif with_babel:
-                parent = get_containing_layout(document.body, i)
-                document.body[parent[2] : parent[2]] = ["\\begin_layout Standard",
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "end{otherlanguage}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        "\\end_layout", ""]
-            del document.body[i]
+                begin_cmd = "\\foreignlanguage{%s}{" % texname
+            end_cmd = "}"
+        else:
              if with_polyglossia:
              if with_polyglossia:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
+                begin_cmd = "\\begin{%s}"%texname
+                end_cmd = "\\end{%s}"%texname
              elif with_babel:
              elif with_babel:
-                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "begin{otherlanguage}{english}",
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        ""]
-        else:
-            i += 1
-
-        # With babel, we need to add the language options
-        if with_babel and (primary or secondary):
-            insert_document_option(document, babelname)
-            if secondary:
-                # Since the user options are always placed after the babel options,
-                # we need to reset the main language
-                document.body[2 : 2] = ["\\begin_layout Standard",
-                                        "\\begin_inset ERT", "status open", "",
-                                        "\\begin_layout Plain Layout", "", "",
-                                        "\\backslash",
-                                        "selectlanguage{%s}" % document.language,
-                                        "\\end_layout", "", "\\end_inset", "", "",
-                                        "\\end_layout", ""]
+                begin_cmd = "\\begin{otherlanguage}{%s}" % texname
+                end_cmd = "\\end{otherlanguage}"
+
+        if (not primary or texname == "english"):
+            try:
+                document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
+                document.body[i+1:i+1] = put_cmd_in_ert(begin_cmd)
+            except UnboundLocalError:
+                pass
+        del document.body[i]
+
+    if not (primary or secondary):
+        return
  
  
+    # Make the language known to Babel/Polyglossia and ensure the correct
+    # document language:
+    doc_lang_switch = ""
+    if with_babel:
+        # add as global option
+        insert_document_option(document, babelname)
+        # Since user options are appended to the document options,
+        # Babel will treat `babelname` as primary language.
+        if not primary:
+            doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
+    if with_polyglossia:
+        # Define language in the user preamble
+        # (don't use \AtBeginDocument, this fails with some languages).
+        add_to_preamble(document, ["\\usepackage{polyglossia}",
+                                   "\\setotherlanguage{%s}" % polyglossianame])
+        if primary:
+            # Changing the main language must be done in the document body.
+            doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
+
+    # Reset LaTeX main language if required and not already done
+    if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
+        document.body[2:2] = put_cmd_in_ert(doc_lang_switch,
+                                            is_open=True, as_paragraph=True)