Fix conversion/reversion of parenthese order in Hebrew text.

[lyx.git] / lib / lyx2lyx / lyx_2_4.py
diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py

index eb7aa4e52096e3a560cf38d431d59e057a3f1a94..fa61754f80a88d186023aaee6d55f467c22330a4 100644 (file)
--- a/lib/lyx2lyx/lyx_2_4.py
+++ b/lib/lyx2lyx/lyx_2_4.py
@@ -36,7 +36,7 @@ from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_la
  #    is_in_inset, set_bool_value
  #    find_tokens, find_token_exact, check_token
  
-from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
+from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, revert_language, revert_flex_inset)
  #  revert_font_attrs, insert_to_preamble, latex_length
  #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  #  revert_flex_inset, hex2ratio, str2bool
@@ -126,7 +126,7 @@ class fontmapping:
              return fontname
          return None
  
-def createFontMapping():
+def createFontMapping(fontlist):
      # Create info for known fonts for the use in
      #   convert_latexFonts() and
      #   revert_latexFonts()
@@ -136,21 +136,38 @@ def createFontMapping():
      # * For now, add DejaVu and IBMPlex only.
      # * Expand, if desired
      fm = fontmapping()
-    fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
-    fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
-    fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
-    fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
-                          'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
-                          'IBMPlexSerifSemibold,semibold'],
-                         "roman", None, "plex-serif")
-    fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
-                          'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
-                          'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
-                         "sans", "sf", "plex-sans", "scale")
-    fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
-                          'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
-                          'IBMPlexMonoSemibold,semibold'],
-                         "typewriter", "tt", "plex-mono", "scale")
+    for font in fontlist:
+        if font == 'DejaVu':
+            fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
+            fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
+            fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
+        elif font == 'IBM':
+            fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
+                                  'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
+                                  'IBMPlexSerifSemibold,semibold'],
+                                 "roman", None, "plex-serif")
+            fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
+                                  'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
+                                  'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
+                                 "sans", "sf", "plex-sans", "scale")
+            fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
+                                  'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
+                                  'IBMPlexMonoSemibold,semibold'],
+                                 "typewriter", "tt", "plex-mono", "scale")
+        elif font == 'Adobe':
+            fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro")
+            fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled")
+            fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled")
+        elif font == 'Noto':
+            fm.expandFontMapping(['NotoSerifRegular,regular', 'NotoSerifMedium,medium',
+                                  'NotoSerifThin,thin', 'NotoSerifLight,light',
+                                  'NotoSerifExtralight,extralight'],
+                                  "roman", None, "noto-serif")
+            fm.expandFontMapping(['NotoSansRegular,regular', 'NotoSansMedium,medium',
+                                  'NotoSansThin,thin', 'NotoSansLight,light',
+                                  'NotoSansExtralight,extralight'],
+                                  "sans", "sf", "noto-sans", "scaled")
+            fm.expandFontMapping(['NotoMonoRegular'], "typewriter", "tt", "noto-mono", "scaled")
      return fm
  
  def convert_fonts(document, fm):
@@ -212,8 +229,9 @@ def convert_fonts(document, fm):
          j = find_token(document.header, ft, 0)
          if j != -1:
              val = get_value(document.header, ft, j)
-            vals = val.split()
-            document.header[j] = ft + ' "' + fn + '" ' + vals[1]
+            words = val.split() # ! splits also values like '"DejaVu Sans"'
+            words[0] = '"' + fn + '"'
+            document.header[j] = ft + ' ' + ' '.join(words)
  
  def revert_fonts(document, fm, fontmap):
      " Revert native font definition to LaTeX "
@@ -235,8 +253,8 @@ def revert_fonts(document, fm, fontmap):
              continue
          ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
          val = get_value(document.header, ft, i)
-        words = val.split()
-        font = words[0].replace('"', '')
+        words = val.split(' ')     # ! splits also values like '"DejaVu Sans"'
+        font = words[0].strip('"') # TeX font name has no whitespace
          if not font in fm.font2pkgmap:
              i += 1
              continue
@@ -244,7 +262,8 @@ def revert_fonts(document, fm, fontmap):
          val = fontinfo.package
          if not val in fontmap:
              fontmap[val] = []
-        document.header[i] = ft + ' "default" ' + words[1]
+        words[0] = '"default"'
+        document.header[i] = ft + ' ' + ' '.join(words)
          if fontinfo.scaleopt != None:
              xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
              mo = rscales.search(xval)
@@ -264,11 +283,43 @@ def revert_fonts(document, fm, fontmap):
  ###
  ###############################################################################
  
+def convert_inputencoding_namechange(document):
+    " Rename inputencoding settings. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        return
+    s = document.header[i].replace("auto", "auto-legacy")
+    document.header[i] = s.replace("default", "auto-legacy-plain")
+
+def revert_inputencoding_namechange(document):
+    " Rename inputencoding settings. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        return
+    s = document.header[i].replace("auto-legacy-plain", "default")
+    document.header[i] = s.replace("auto-legacy", "auto")
+
+def convert_notoFonts(document):
+    " Handle Noto fonts definition to LaTeX "
+
+    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
+        fm = createFontMapping(['Noto'])
+        convert_fonts(document, fm)
+
+def revert_notoFonts(document):
+    " Revert native Noto font definition to LaTeX "
+
+    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
+        fontmap = dict()
+        fm = createFontMapping(['Noto'])
+        revert_fonts(document, fm, fontmap)
+        add_preamble_fonts(document, fontmap)
+
  def convert_latexFonts(document):
      " Handle DejaVu and IBMPlex fonts definition to LaTeX "
  
      if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
-        fm = createFontMapping()
+        fm = createFontMapping(['DejaVu', 'IBM'])
          convert_fonts(document, fm)
  
  def revert_latexFonts(document):
@@ -276,7 +327,23 @@ def revert_latexFonts(document):
  
      if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
          fontmap = dict()
-        fm = createFontMapping()
+        fm = createFontMapping(['DejaVu', 'IBM'])
+        revert_fonts(document, fm, fontmap)
+        add_preamble_fonts(document, fontmap)
+
+def convert_AdobeFonts(document):
+    " Handle DejaVu and IBMPlex fonts definition to LaTeX "
+
+    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
+        fm = createFontMapping(['Adobe'])
+        convert_fonts(document, fm)
+
+def revert_AdobeFonts(document):
+    " Revert native DejaVu font definition to LaTeX "
+
+    if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
+        fontmap = dict()
+        fm = createFontMapping(['Adobe'])
          revert_fonts(document, fm, fontmap)
          add_preamble_fonts(document, fontmap)
  
@@ -529,7 +596,7 @@ def revert_floatpclass(document):
              k = find_token(document.body, 'placement document', i, i + 2)
              if k != -1:
                  del document.body[k]
-            i = j
+            i += 1
              continue
          del document.body[k]
  
@@ -564,7 +631,7 @@ def revert_floatalignment(document):
          l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
          if l == -1:
              document.warning("Can't find float layout!")
-            i = j
+            i += 1
              continue
          alcmd = []
          if alignment == "left":
@@ -575,7 +642,7 @@ def revert_floatalignment(document):
              alcmd = put_cmd_in_ert("\\raggedleft{}")
          if len(alcmd) > 0:
              document.body[l+1:l+1] = alcmd
-        i = j 
+        i += 1
  
  
  def revert_tuftecite(document):
@@ -705,7 +772,7 @@ def revert_vcolumns(document):
                              if vval != "":
                                  needarray = True
                              vval += "V{\\linewidth}"
-                
+
                              document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
                              # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
                              # with newlines, and we do not want that)
@@ -751,7 +818,7 @@ def revert_bibencoding(document):
      if engine in ["biblatex", "biblatex-natbib"]:
          biblatex = True
  
-    # Map lyx to latex encoding names 
+    # Map lyx to latex encoding names
      encodings = {
          "utf8" : "utf8",
          "utf8x" : "utf8x",
@@ -810,6 +877,9 @@ def revert_bibencoding(document):
          k = find_token(document.body, "encoding", i, j)
          if k != -1:
              del document.body[k]
+        if encoding == "default":
+            i += 1
+            continue
          # Re-find inset end line
          j = find_end_of_inset(document.body, i)
          if biblatex:
@@ -972,6 +1042,7 @@ def revert_dateinfo(document):
          "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
          "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
          "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
+        "malayalam" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
          "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
          "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
          "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
@@ -1071,6 +1142,10 @@ def revert_dateinfo(document):
              fmt = re.sub('[^\'%]d', '%d', fmt)
              fmt = fmt.replace("'", "")
              result = dte.strftime(fmt)
+        if sys.version_info < (3,0):
+            # In Python 2, datetime module works with binary strings,
+            # our dateformat strings are utf8-encoded:
+            result = result.decode('utf-8')
          document.body[i : j+1] = result
          i = i + 1
  
@@ -1150,6 +1225,7 @@ def revert_timeinfo(document):
          "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
          "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
          "magyar" : ["%H:%M:%S %Z", "%H:%M"],
+        "malayalam" : ["%p %I:%M:%S %Z", "%p %I:%M"],
          "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
          "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
          "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
@@ -1299,7 +1375,7 @@ def revert_l7ninfo(document):
              continue
          arg = find_token(document.body, 'arg', i, j)
          argv = get_quoted_value(document.body, "arg", arg)
-        # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & " 
+        # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
          argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
          document.body[i : j+1] = argv
          i = i + 1
@@ -1330,6 +1406,227 @@ def revert_listpargs(document):
          i += 1
  
  
+def revert_lformatinfo(document):
+    " Revert layout format Info inset to text. "
+
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Info", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of Info inset.")
+            i = i + 1
+            continue
+        tp = find_token(document.body, 'type', i, j)
+        tpv = get_quoted_value(document.body, "type", tp)
+        if tpv != "lyxinfo":
+            i = i + 1
+            continue
+        arg = find_token(document.body, 'arg', i, j)
+        argv = get_quoted_value(document.body, "arg", arg)
+        if argv != "layoutformat":
+            i = i + 1
+            continue
+        # hardcoded for now
+        document.body[i : j+1] = "69"
+        i = i + 1
+
+
+def convert_hebrew_parentheses(document):
+    """ Swap opening/closing parentheses in Hebrew text. 
+    
+    Up to LyX 2.4, ")" was used as opening parenthesis and
+    "(" as closing parenthesis for Hebrew in the LyX source.
+    """
+    
+    print("convert hebrew parentheses")
+    current_languages = [document.language]
+    for i, line in enumerate(document.body):
+        if line.startswith('\\lang '):
+            current_languages[-1] = line.lstrip('\\lang ')
+        elif line.startswith('\\begin_layout'):
+            current_languages.append(current_languages[-1])
+            print (line, current_languages[-1])      
+        elif line.startswith('\\end_layout'):
+            current_languages.pop()
+        elif current_languages[-1] == 'hebrew' and not line.startswith('\\'):
+            document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
+
+
+def revert_hebrew_parentheses(document):
+    " Store parentheses in Hebrew text reversed"
+    # This only exists to keep the convert/revert naming convention
+    convert_hebrew_parentheses(document)
+
+
+def revert_malayalam(document):
+    " Set the document language to English but assure Malayalam output "
+
+    revert_language(document, "malayalam", "", "malayalam")
+
+
+def revert_soul(document):
+    " Revert soul module flex insets to ERT "
+
+    flexes = ["Spaceletters", "Strikethrough", "Underline", "Highlight", "Capitalize"]
+
+    for flex in flexes:
+        i = find_token(document.body, "\\begin_inset Flex %s" % flex, 0)
+        if i != -1:
+            add_to_preamble(document, ["\\usepackage{soul}"])
+            break
+    i = find_token(document.body, "\\begin_inset Flex Highlight", 0)
+    if i != -1:
+        add_to_preamble(document, ["\\usepackage{color}"])
+    
+    revert_flex_inset(document.body, "Spaceletters", "\\so")
+    revert_flex_inset(document.body, "Strikethrough", "\\st")
+    revert_flex_inset(document.body, "Underline", "\\ul")
+    revert_flex_inset(document.body, "Highlight", "\\hl")
+    revert_flex_inset(document.body, "Capitalize", "\\caps")
+
+
+def revert_tablestyle(document):
+    " Remove tablestyle params "
+
+    i = 0
+    i = find_token(document.header, "\\tablestyle", 0)
+    if i != -1:
+        del document.header[i]
+
+
+def revert_bibfileencodings(document):
+    " Revert individual Biblatex bibliography encodings "
+
+    # Get cite engine
+    engine = "basic"
+    i = find_token(document.header, "\\cite_engine", 0)
+    if i == -1:
+        document.warning("Malformed document! Missing \\cite_engine")
+    else:
+        engine = get_value(document.header, "\\cite_engine", i)
+
+    # Check if biblatex
+    biblatex = False
+    if engine in ["biblatex", "biblatex-natbib"]:
+        biblatex = True
+
+    # Map lyx to latex encoding names
+    encodings = {
+        "utf8" : "utf8",
+        "utf8x" : "utf8x",
+        "armscii8" : "armscii8",
+        "iso8859-1" : "latin1",
+        "iso8859-2" : "latin2",
+        "iso8859-3" : "latin3",
+        "iso8859-4" : "latin4",
+        "iso8859-5" : "iso88595",
+        "iso8859-6" : "8859-6",
+        "iso8859-7" : "iso-8859-7",
+        "iso8859-8" : "8859-8",
+        "iso8859-9" : "latin5",
+        "iso8859-13" : "latin7",
+        "iso8859-15" : "latin9",
+        "iso8859-16" : "latin10",
+        "applemac" : "applemac",
+        "cp437" : "cp437",
+        "cp437de" : "cp437de",
+        "cp850" : "cp850",
+        "cp852" : "cp852",
+        "cp855" : "cp855",
+        "cp858" : "cp858",
+        "cp862" : "cp862",
+        "cp865" : "cp865",
+        "cp866" : "cp866",
+        "cp1250" : "cp1250",
+        "cp1251" : "cp1251",
+        "cp1252" : "cp1252",
+        "cp1255" : "cp1255",
+        "cp1256" : "cp1256",
+        "cp1257" : "cp1257",
+        "koi8-r" : "koi8-r",
+        "koi8-u" : "koi8-u",
+        "pt154" : "pt154",
+        "utf8-platex" : "utf8",
+        "ascii" : "ascii"
+    }
+
+    i = 0
+    bibresources = []
+    while (True):
+        i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of bibtex inset at line %d!!" %(i))
+            i += 1
+            continue
+        encodings = get_quoted_value(document.body, "file_encodings", i, j)
+        if not encodings:
+            i += 1
+            continue
+        bibfiles = get_quoted_value(document.body, "bibfiles", i, j).split(",")
+        opts = get_quoted_value(document.body, "biblatexopts", i, j)
+        if len(bibfiles) == 0:
+            document.warning("Bibtex inset at line %d does not have a bibfile!" %(i))
+        # remove encoding line
+        k = find_token(document.body, "file_encodings", i, j)
+        if k != -1:
+            del document.body[k]
+        # Re-find inset end line
+        j = find_end_of_inset(document.body, i)
+        if biblatex:
+            enclist = encodings.split("\t")
+            encmap = dict()
+            for pp in enclist:
+                ppp = pp.split(" ", 1)
+                encmap[ppp[0]] = ppp[1]
+            for bib in bibfiles:
+                pr = "\\addbibresource"
+                if bib in encmap.keys():
+                    pr += "[bibencoding=" + encmap[bib] + "]"
+                pr += "{" + bib + "}"
+                add_to_preamble(document, [pr])
+            # Insert ERT \\printbibliography and wrap bibtex inset to a Note
+            pcmd = "printbibliography"
+            if opts:
+                pcmd += "[" + opts + "]"
+            repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
+                    "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
+                    "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
+                    "status open", "", "\\begin_layout Plain Layout" ]
+            repl += document.body[i:j+1]
+            repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
+            document.body[i:j+1] = repl
+            j += 27
+
+        i = j + 1
+
+
+def revert_cmidruletrimming(document):
+    " Remove \\cmidrule trimming "
+
+    # FIXME: Revert to TeX code?
+    i = 0
+    while True:
+        # first, let's find out if we need to do anything
+        i = find_token(document.body, '<cell ', i)
+        if i == -1:
+            return
+        j = document.body[i].find('trim="')
+        if j == -1:
+             i += 1
+             continue
+        rgx = re.compile(r' (bottom|top)line[lr]trim="true"')
+        # remove trim option
+        document.body[i] = rgx.sub('', document.body[i])
+
+        i += 1
+
+
  ##
  # Conversion hub
  #
@@ -1354,10 +1651,29 @@ convert = [
             [560, []],
             [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
             [562, []],
-           [563, []]
+           [563, []],
+           [564, []],
+           [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
+           [566, [convert_hebrew_parentheses]],
+           [567, []],
+           [568, []],
+           [569, []],
+           [570, []],
+           [571, []],
+           [572, [convert_notoFonts]],  # Added options thin, light, extralight for Noto
+           [573, [convert_inputencoding_namechange]],
            ]
  
-revert =  [
+revert =  [[572, [revert_inputencoding_namechange]],
+           [571, [revert_notoFonts]],
+           [570, [revert_cmidruletrimming]],
+           [569, [revert_bibfileencodings]],
+           [568, [revert_tablestyle]],
+           [567, [revert_soul]],
+           [566, [revert_malayalam]],
+           [565, [revert_hebrew_parentheses]],
+           [564, [revert_AdobeFonts]],
+           [563, [revert_lformatinfo]],
             [562, [revert_listpargs]],
             [561, [revert_l7ninfo]],
             [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble