revert_language: fix document language assignment

[lyx.git] / lib / lyx2lyx / lyx2lyx_tools.py
diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py

index 5c1f3cb427471ce960404dc741a68f454bb051c5..75a5c820def0b75b958f5b13505e2780ff08fcec 100644 (file)
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@@ -17,8 +17,8 @@
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  
  '''
-This module offers several free functions to help with lyx2lyx'ing. 
-More documentaton is below, but here is a quick guide to what 
+This module offers several free functions to help with lyx2lyx'ing.
+More documentaton is below, but here is a quick guide to what
  they do. Optional arguments are marked by brackets.
  
  add_to_preamble(document, text):
@@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]):
    default index is 0, so the material is inserted at the beginning.
    Prepends a comment "% Added by lyx2lyx" to text.
  
-put_cmd_in_ert(arg):
-  Here arg should be a list of strings (lines), which we want to
+put_cmd_in_ert(cmd):
+  Here cmd should be a list of strings (lines), which we want to
    wrap in ERT. Returns a list of strings so wrapped.
    A call to this routine will often go something like this:
      i = find_token('\\begin_inset FunkyInset', ...)
@@ -47,23 +47,53 @@ put_cmd_in_ert(arg):
      ert = put_cmd_in_ert(content)
      document.body[i:j+1] = ert
  
+get_ert(lines, i[, verbatim]):
+  Here, lines is a list of lines of LyX material containing an ERT inset,
+  whose content we want to convert to LaTeX. The ERT starts at index i.
+  If the optional (by default: False) bool verbatim is True, the content
+  of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
+  for the use in verbatim insets.
+
  lyx2latex(document, lines):
-  Here, lines is a list of lines of LyX material we want to convert 
+  Here, lines is a list of lines of LyX material we want to convert
    to LaTeX. We do the best we can and return a string containing
    the translated material.
  
+lyx2verbatim(document, lines):
+  Here, lines is a list of lines of LyX material we want to convert
+  to verbatim material (used in ERT an the like). We do the best we
+  can and return a string containing the translated material.
+
  latex_length(slen):
-    Convert lengths (in LyX form) to their LaTeX representation. Returns 
-    (bool, length), where the bool tells us if it was a percentage, and 
-    the length is the LaTeX representation.
+  Convert lengths (in LyX form) to their LaTeX representation. Returns
+  (bool, length), where the bool tells us if it was a percentage, and
+  the length is the LaTeX representation.
+
+convert_info_insets(document, type, func):
+  Applies func to the argument of all info insets matching certain types
+  type : the type to match. This can be a regular expression.
+  func : function from string to string to apply to the "arg" field of
+         the info insets.
+
+is_document_option(document, option):
+  Find if _option_ is a document option (\\options in the header).
  
+insert_document_option(document, option):
+  Insert _option_ as a document option.
+
+remove_document_option(document, option):
+  Remove _option_ as a document option.
+
+revert_language(document, lyxname, babelname, polyglossianame):
+  Reverts native language support to ERT
+  If babelname or polyglossianame is empty, it is assumed
+  this language package is not supported for the given language.
  '''
  
-import string
-from parser_tools import find_token, find_end_of_inset
+import re
+from parser_tools import find_token, find_end_of_inset, get_containing_layout, get_value, get_bool_value
  from unicode_symbols import unicode_reps
  
-
  # This will accept either a list of lines or a single line.
  # It is bad practice to pass something with embedded newlines,
  # though we will handle that.
@@ -100,39 +130,42 @@ def add_to_preamble(document, text):
  # It should really be a list.
  def insert_to_preamble(document, text, index = 0):
      """ Insert text to the preamble at a given line"""
-    
+
      if not type(text) is list:
        # split on \n just in case
        # it'll give us the one element list we want
        # if there's no \n, too
        text = text.split('\n')
-    
+
      text.insert(0, "% Added by lyx2lyx")
      document.preamble[index:index] = text
  
  
-def put_cmd_in_ert(arg):
-    '''
-    arg should be a list of lines we want to wrap in ERT.
-    Returns a list of strings, with the lines so wrapped.
-    '''
-    
+# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
+# Created from the reversed list to keep the first of alternative definitions.
+licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
+
+def put_cmd_in_ert(cmd):
+    """
+    Return ERT inset wrapping `cmd` as a list of strings.
+
+    `cmd` can be a string or list of lines. Non-ASCII characters are converted
+    to the respective LICR macros if defined in unicodesymbols.
+    """
      ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
-    # It will be faster for us to work with a single string internally. 
-    # That way, we only go through the unicode_reps loop once.
-    if type(arg) is list:
-      s = "\n".join(arg)
+    # It will be faster to work with a single string internally.
+    if isinstance(cmd, list):
+        cmd = u"\n".join(cmd)
      else:
-      s = arg
-    for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
-    s = s.replace('\\', "\\backslash\n")
-    ret += s.splitlines()
+        cmd = u"%s" % cmd # ensure it is an unicode instance
+    cmd = cmd.translate(licr_table)
+    cmd = cmd.replace("\\", "\n\\backslash\n")
+    ret += cmd.splitlines()
      ret += ["\\end_layout", "", "\\end_inset"]
      return ret
  
  
-def get_ert(lines, i):
+def get_ert(lines, i, verbatim = False):
      'Convert an ERT inset into LaTeX.'
      if not lines[i].startswith("\\begin_inset ERT"):
          return ""
@@ -156,7 +189,10 @@ def get_ert(lines, i):
              while i + 1 < j and lines[i+1] == "":
                  i = i + 1
          elif lines[i] == "\\backslash":
-            ret = ret + "\\"
+            if verbatim:
+                ret = ret + "\n" + lines[i] + "\n"
+            else:
+                ret = ret + "\\"
          else:
              ret = ret + lines[i]
          i = i + 1
@@ -254,7 +290,7 @@ def lyx2latex(document, lines):
  
            # Do the LyX text --> LaTeX conversion
            for rep in unicode_reps:
-            line = line.replace(rep[1], rep[0] + "{}")
+              line = line.replace(rep[1], rep[0])
            line = line.replace(r'\backslash', r'\textbackslash{}')
            line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
            line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
@@ -269,8 +305,17 @@ def lyx2latex(document, lines):
      return content
  
  
+def lyx2verbatim(document, lines):
+    'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
+
+    content = lyx2latex(document, lines)
+    content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
+
+    return content
+
+
  def latex_length(slen):
-    ''' 
+    '''
      Convert lengths to their LaTeX representation. Returns (bool, length),
      where the bool tells us if it was a percentage, and the length is the
      LaTeX representation.
@@ -284,9 +329,14 @@ def latex_length(slen):
      # the + always precedes the -
  
      # Convert relative lengths to LaTeX units
-    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
-             "page%":"\\paperwidth", "line%":"\\linewidth",
-             "theight%":"\\textheight", "pheight%":"\\paperheight"}
+    units = {"col%": "\\columnwidth",
+             "text%": "\\textwidth",
+             "page%": "\\paperwidth",
+             "line%": "\\linewidth",
+             "theight%": "\\textheight",
+             "pheight%": "\\paperheight",
+             "baselineskip%": "\\baselineskip"
+            }
      for unit in list(units.keys()):
          i = slen.find(unit)
          if i == -1:
@@ -323,6 +373,44 @@ def latex_length(slen):
      return (percent, slen)
  
  
+def length_in_bp(length):
+    " Convert a length in LyX format to its value in bp units "
+
+    em_width = 10.0 / 72.27 # assume 10pt font size
+    text_width = 8.27 / 1.7 # assume A4 with default margins
+    # scale factors are taken from Length::inInch()
+    scales = {"bp"       : 1.0,
+              "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
+              "cm"       : (72.0 / 2.54),
+              "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
+              "em"       : (72.0 * em_width),
+              "ex"       : (72.0 * em_width * 0.4305),
+              "in"       : 72.0,
+              "mm"       : (72.0 / 25.4),
+              "mu"       : (72.0 * em_width / 18.0),
+              "pc"       : (72.0 / (72.27 / 12.0)),
+              "pt"       : (72.0 / (72.27)),
+              "sp"       : (72.0 / (72.27 * 65536.0)),
+              "text%"    : (72.0 * text_width / 100.0),
+              "col%"     : (72.0 * text_width / 100.0), # assume 1 column
+              "page%"    : (72.0 * text_width * 1.7 / 100.0),
+              "line%"    : (72.0 * text_width / 100.0),
+              "theight%" : (72.0 * text_width * 1.787 / 100.0),
+              "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
+
+    rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
+    m = rx.match(length)
+    if not m:
+        document.warning("Invalid length value: " + length + ".")
+        return 0
+    value = m.group(1)
+    unit = m.group(2)
+    if not unit in scales.keys():
+        document.warning("Unknown length unit: " + unit + ".")
+        return value
+    return "%g" % (float(value) * scales[unit])
+
+
  def revert_flex_inset(lines, name, LaTeXname):
    " Convert flex insets to TeX code "
    i = 0
@@ -382,7 +470,7 @@ def revert_font_attrs(lines, name, LaTeXname):
    while True:
      i = find_token(lines, name + ' on', i)
      if i == -1:
-      return changed
+      break
      j = find_token(lines, name + ' default', i)
      k = find_token(lines, name + ' on', i + 1)
      # if there is no default set, the style ends with the layout
@@ -396,6 +484,16 @@ def revert_font_attrs(lines, name, LaTeXname):
      changed = True
      i += 1
  
+  # now delete all remaining lines that manipulate this attribute
+  i = 0
+  while True:
+    i = find_token(lines, name, i)
+    if i == -1:
+      break
+    del lines[i]
+
+  return changed
+
  
  def revert_layout_command(lines, name, LaTeXname):
    " Reverts a command from a layout to TeX code "
@@ -440,3 +538,238 @@ def str2bool(s):
    "'true' goes to True, case-insensitively, and we strip whitespace."
    s = s.strip().lower()
    return s == "true"
+
+
+def convert_info_insets(document, type, func):
+    "Convert info insets matching type using func."
+    i = 0
+    type_re = re.compile(r'^type\s+"(%s)"$' % type)
+    arg_re = re.compile(r'^arg\s+"(.*)"$')
+    while True:
+        i = find_token(document.body, "\\begin_inset Info", i)
+        if i == -1:
+            return
+        t = type_re.match(document.body[i + 1])
+        if t:
+            arg = arg_re.match(document.body[i + 2])
+            if arg:
+                new_arg = func(arg.group(1))
+                document.body[i + 2] = 'arg   "%s"' % new_arg
+        i += 3
+
+
+def insert_document_option(document, option):
+    "Insert _option_ as a document option."
+
+    # Find \options in the header
+    options_line = find_token(document.header, "\\options", 0)
+
+    # if the options does not exists add it after the textclass
+    if options_line == -1:
+        textclass_line = find_token(document.header, "\\textclass", 0)
+        document.header.insert(textclass_line +1,
+                               r"\options %s" % option)
+        return
+
+    # add it to the end of the options
+    document.header[options_line] += ",%s" % option
+
+
+def remove_document_option(document, option):
+    """ Remove _option_ as a document option.
+
+    It is assumed that option belongs to the \options.
+    That can be done running is_document_option(document, option)."""
+
+    options_line = find_token(document.header, "\\options", 0)
+    option_pos = document.header[options_line].find(option)
+
+    # Remove option from \options
+    comma_before_pos = document.header[options_line].rfind(',', 0, option_pos)
+    comma_after_pos  = document.header[options_line].find(',', option_pos)
+
+    # if there are no commas then it is the single option
+    # and the options line should be removed since it will be empty
+    if comma_before_pos == comma_after_pos == -1:
+        del document.header[options_line]
+        return
+
+    # last option
+    options = document.header[options_line]
+    if comma_after_pos == -1:
+        document.header[options_line] = options[:comma_before_pos].rsplit()
+        return
+
+    document.header[options_line] = options[comma_before_pos: comma_after_pos]
+
+
+def is_document_option(document, option):
+    "Find if _option_ is a document option"
+
+    # Find \options in the header
+    options_line = find_token(document.header, "\\options", 0)
+
+    # \options is not present in the header
+    if options_line == -1:
+        return False
+
+    option_pos = document.header[options_line].find(option)
+    # option is not present in the \options
+    if option_pos == -1:
+        return False
+
+    return True
+
+
+def revert_language(document, lyxname, babelname, polyglossianame):
+    " Revert native language support "
+
+    # Are we using polyglossia?
+    use_polyglossia = False
+    if get_bool_value(document.header, "\\use_non_tex_fonts"):
+        i = find_token(document.header, "\\language_package")
+        if i == -1:
+            document.warning("Malformed document! Missing \\language_package")
+        else:
+            pack = get_value(document.header, "\\language_package", i)
+            if pack == "default" or pack == "auto":
+                use_polyglossia = True
+
+    # Do we use this language with polyglossia?
+    with_polyglossia = use_polyglossia and polyglossianame != ""
+    # Do we use this language with babel?
+    with_babel = with_polyglossia == False and babelname != ""
+
+    # Are we dealing with a primary or secondary language?
+    primary = False
+    secondary = False
+
+    orig_doc_language = document.language
+    # Main language first
+    if document.language == lyxname:
+        primary = True
+        document.language = "english"
+        i = find_token(document.header, "\\language %s" % lyxname, 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+        j = find_token(document.header, "\\language_package default", 0)
+        if j != -1:
+            document.header[j] = "\\language_package default"
+        if with_polyglossia:
+            add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])
+            document.body[2 : 2] = ["\\begin_layout Standard",
+                                    "\\begin_inset ERT", "status open", "",
+                                    "\\begin_layout Plain Layout", "", "",
+                                    "\\backslash",
+                                    "resetdefaultlanguage{%s}" % polyglossianame,
+                                    "\\end_layout", "", "\\end_inset", "", "",
+                                    "\\end_layout", ""]
+
+    # Now secondary languages
+    i = 0
+    while True:
+        i = find_token(document.body, '\\lang', i)
+        if i == -1:
+            break
+        if document.body[i].startswith('\\lang %s' % lyxname):
+            secondary = True
+            endlang = get_containing_layout(document.body, i)[2]
+            langswitch = find_token(document.body, '\\lang', i + 1, endlang)
+            startlayout = "\\begin_layout Standard"
+            endlayout = "\\end_layout"
+            if langswitch != -1:
+                endlang = langswitch
+                startlayout = ""
+                endlayout = ""
+            if with_polyglossia:
+                add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame])     
+                document.body[endlang : endlang] = [startlayout,
+                                        "\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "end{%s}" % polyglossianame,
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        endlayout, ""]
+            elif with_babel:
+                document.body[endlang : endlang] = [startlayout,
+                                        "\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "end{otherlanguage}",
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        endlayout, ""]
+            del document.body[i]
+            if with_polyglossia:
+                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "begin{%s}" % polyglossianame,
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        ""]
+            elif with_babel:
+                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "begin{otherlanguage}{%s}" % babelname,
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        ""]
+        elif primary and document.body[i].startswith('\\lang english'):
+            # Since we switched the main language manually, English parts need to be marked
+            endlang = get_containing_layout(document.body, i)[2]
+            langswitch = find_token(document.body, '\\lang', i + 1, endlang)
+            startlayout = "\\begin_layout Standard"
+            endlayout = "\\end_layout"
+            if langswitch != -1:
+                endlang = langswitch
+                startlayout = ""
+                endlayout = ""
+            if with_polyglossia:
+                parent = get_containing_layout(document.body, i)
+                document.body[endlang : endlang] = [startlayout,
+                                        "\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "end{english}",
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        endlayout, ""]
+            elif with_babel:
+                parent = get_containing_layout(document.body, i)
+                document.body[endlang : endlang] = [startlayout,
+                                        "\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "end{otherlanguage}",
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        endlayout, ""]
+            del document.body[i]
+            if with_polyglossia:
+                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "begin{english}",
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        ""]
+            elif with_babel:
+                document.body[i : i] = ["\\begin_inset ERT", "status open", "",
+                                        "\\begin_layout Plain Layout", "", "",
+                                        "\\backslash",
+                                        "begin{otherlanguage}{english}",
+                                        "\\end_layout", "", "\\end_inset", "", "",
+                                        ""]
+        else:
+            i += 1
+
+    # With babel, we need to add the language options
+    if with_babel and (primary or secondary):
+        insert_document_option(document, babelname)
+        if secondary and document.body[10] != "selectlanguage{%s}" % orig_doc_language:
+            # Since the user options are always placed after the babel options,
+            # we need to reset the main language
+            document.body[2 : 2] = ["\\begin_layout Standard",
+                                    "\\begin_inset ERT", "status open", "",
+                                    "\\begin_layout Plain Layout", "", "",
+                                    "\\backslash",
+                                    "selectlanguage{%s}" % orig_doc_language,
+                                    "\\end_layout", "", "\\end_inset", "", "",
+                                    "\\end_layout", ""]
+