RELEASE-NOTES: document encoding defaults change

[lyx.git] / lib / lyx2lyx / lyx_2_3.py
diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py

index 7f4ceef6bd632cd19b958d886fa2d20920a5df17..8b834c1abc4130ebec9dc8b45d60f461fa9dd209 100644 (file)
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@@ -32,9 +32,7 @@ from parser_tools import (del_token, del_value, del_complete_lines,
  #  find_tokens, find_token_exact, check_token, get_option_value
  
  from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, revert_font_attrs,
-                           insert_to_preamble, latex_length)
-#  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
-#  revert_flex_inset, hex2ratio, str2bool
+                           insert_to_preamble, latex_length, revert_language)
  
  ####################################################################
  # Private helper functions
@@ -132,7 +130,6 @@ def revert_ibranches(document):
              continue
          if inverted:
              branch = document.body[i][20:].strip()
-            #document.warning(branch)
              if not branch in antibranches:
                  antibranch = "Anti-" + branch
                  while antibranch in antibranches:
@@ -140,7 +137,6 @@ def revert_ibranches(document):
                  antibranches[branch] = antibranch
              else:
                  antibranch = antibranches[branch]
-            #document.warning(antibranch)
              document.body[i] = "\\begin_inset Branch " + antibranch
  
      # now we need to add the new branches to the header
@@ -252,30 +248,17 @@ def revert_new_babel_languages(document):
  
      Set the document language to English but use correct babel setting.
      """
-    # TODO: currently, text parts in these languages are kept as-is
-    # and are converted to the document language by LyX 2.2 with warnings like
-    # LyX: Unknown language `romansh' [around line 273 of file lyx_2_3_test.22.lyx current token: 'romansh' context: 'InsetSpaceParams::read']
  
-    if document.language not in ["bosnian", "friulan", "macedonian",
-                                 "piedmontese", "romansh"]:
-        return
-    i = find_token(document.header, "\\language")
-    if i != -1:
-        document.header[i] = "\\language english"
-    # ensure we use Babel:
-    # TODO: Polyglossia supports friulan, piedmontese, romansh
-    # but requires "\resetdefaultlanguage{...}" at begin of document.
-    j = find_token(document.header, "\\language_package default")
-    if j != -1:
-        document.header[j] = "\\language_package babel"
-    k = find_token(document.header, "\\options")
-    if k != -1:
-        document.header[k] = document.header[k].replace("\\options",
-                                    "\\options %s," % document.language)
-    else:
-        l = find_token(document.header, "\\use_default_options")
-        document.header.insert(l + 1, "\\options " + document.language)
-    document.language = "english"
+    nblanguages = ["bosnian", "friulan", "macedonian", "piedmontese", "romansh"]
+
+    for lang in nblanguages:
+        if lang == "bosnian" or lang == "macedonian":
+            # These are only supported by babel
+            revert_language(document, lang, lang, "")
+        else:
+            # These are supported by babel and polyglossia
+            revert_language(document, lang, lang, lang)
+
  
  # TODO:
  # def convert_new_babel_languages(document)
@@ -286,127 +269,37 @@ def revert_new_babel_languages(document):
  def revert_amharic(document):
      "Set the document language to English but assure Amharic output"
  
-    if document.language == "amharic":
-        document.language = "english"
-        i = find_token(document.header, "\\language amharic", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{amharic}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{amharic}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "amharic", "", "amharic")
  
  
  def revert_asturian(document):
      "Set the document language to English but assure Asturian output"
  
-    if document.language == "asturian":
-        document.language = "english"
-        i = find_token(document.header, "\\language asturian", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{asturian}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{asturian}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "asturian", "", "asturian")
  
  
  def revert_kannada(document):
      "Set the document language to English but assure Kannada output"
  
-    if document.language == "kannada":
-        document.language = "english"
-        i = find_token(document.header, "\\language kannada", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{kannada}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{kannada}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "kannada", "", "kannada")
  
  
  def revert_khmer(document):
      "Set the document language to English but assure Khmer output"
  
-    if document.language == "khmer":
-        document.language = "english"
-        i = find_token(document.header, "\\language khmer", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{khmer}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{khmer}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "khmer", "", "khmer")
  
  
  def revert_urdu(document):
      "Set the document language to English but assure Urdu output"
  
-    if document.language == "urdu":
-        document.language = "english"
-        i = find_token(document.header, "\\language urdu", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{urdu}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{urdu}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "urdu", "", "urdu")
  
  
  def revert_syriac(document):
      "Set the document language to English but assure Syriac output"
  
-    if document.language == "syriac":
-        document.language = "english"
-        i = find_token(document.header, "\\language syriac", 0)
-        if i != -1:
-            document.header[i] = "\\language english"
-        j = find_token(document.header, "\\language_package default", 0)
-        if j != -1:
-            document.header[j] = "\\language_package default"
-        add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{syriac}}"])
-        document.body[2 : 2] = ["\\begin_layout Standard",
-                                "\\begin_inset ERT", "status open", "",
-                                "\\begin_layout Plain Layout", "", "",
-                                "\\backslash",
-                                "resetdefaultlanguage{syriac}",
-                                "\\end_layout", "", "\\end_inset", "", "",
-                                "\\end_layout", ""]
+    revert_language(document, "syriac", "", "syriac")
  
  
  def revert_quotes(document):
@@ -420,6 +313,7 @@ def revert_quotes(document):
          if len(words) > 1 and words[0] == "\\begin_inset" and \
             ( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ):
              j = find_end_of_inset(document.body, i)
+
              if j == -1:
                  document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
                  i += 1
@@ -434,10 +328,10 @@ def revert_quotes(document):
                      document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
                      i = k
                      continue
-                replace = "\""
+                replace = '"'
                  if document.body[k].endswith("s"):
                      replace = "'"
-                document.body[k:l+1] = [replace]
+                document.body[k:l+2] = [replace]
          else:
              i += 1
              continue
@@ -467,7 +361,7 @@ def revert_quotes(document):
                  replace = "\""
                  if document.body[k].endswith("s"):
                      replace = "'"
-                document.body[k:l+1] = [replace]
+                document.body[k:l+2] = [replace]
          else:
              i += 1
              continue
@@ -498,7 +392,7 @@ def revert_quotes(document):
              replace = "\""
              if document.body[k].endswith("s"):
                  replace = "'"
-            document.body[k:l+1] = [replace]
+            document.body[k:l+2] = [replace]
          i = l
  
  
@@ -602,7 +496,7 @@ def revert_plainquote(document):
          replace = "\""
          if document.body[k].endswith("s"):
              replace = "'"
-        document.body[k:l+1] = [replace]
+        document.body[k:l+2] = [replace]
          i = l
  
  
@@ -1503,24 +1397,29 @@ command_insets = ["bibitem", "citation", "href", "index_print", "nomenclature"]
  def convert_literalparam(document):
      " Add param literal "
  
-    for inset in command_insets:
-        i = 0
-        while True:
-            i = find_token(document.body, '\\begin_inset CommandInset %s' % inset, i)
-            if i == -1:
-                break
-            j = find_end_of_inset(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of %s inset at line %d" % (inset, i))
-                i += 1
-                continue
-            while i < j and document.body[i].strip() != '':
-                i += 1
-            # href is already fully latexified. Here we can switch off literal.
-            if inset == "href":
-                document.body.insert(i, "literal \"false\"")
-            else:
-                document.body.insert(i, "literal \"true\"")
+    pos = len("\\begin_inset CommandInset ")
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset CommandInset', i)
+        if i == -1:
+            break
+        inset = document.body[i][pos:].strip()
+        if not inset in command_insets:
+            i += 1
+            continue
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of %s inset at line %d" % (inset, i))
+            i += 1
+            continue
+        while i < j and document.body[i].strip() != '':
+            i += 1
+        # href is already fully latexified. Here we can switch off literal.
+        if inset == "href":
+            document.body.insert(i, "literal \"false\"")
+        else:
+            document.body.insert(i, "literal \"true\"")
+        i = j + 1
  
  
  def revert_literalparam(document):
@@ -1805,7 +1704,7 @@ def convert_dashligatures(document):
          # or "\threehyphens\n" as interim representation for -- an ---.)
          lines = document.body
          has_literal_dashes = has_ligature_dashes = False
-        dash_pattern = re.compile(u"[\u2013\u2014]|\\twohyphens|\\threehyphens")
+        dash_pattern = re.compile(u".*[\u2013\u2014]|\\twohyphens|\\threehyphens")
          i = j = 0
          while True:
              # skip lines without dashes:
@@ -1833,17 +1732,20 @@ def convert_dashligatures(document):
                  document.warning("Malformed LyX document: "
                                  "Can't find layout at line %d" % i)
                  continue
+            if not layoutname:
+                document.warning("Malformed LyX document: "
+                                 "Missing layout name on line %d"%start)
              if layoutname == "LyX-Code":
                  i = end
                  continue
  
-            # literal dash followed by a word or no-break space:
-            if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
+            # literal dash followed by a non-white-character or no-break space:
+            if re.search(u"[\u2013\u2014]([\S\u00A0\u202F\u2060]|$)",
                           line, flags=re.UNICODE):
                  has_literal_dashes = True
-            # ligature dash followed by word or no-break space on next line:
+            # ligature dash followed by non-white-char or no-break space on next line:
              if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
-                re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)):
+                re.match(u"[\S\u00A0\u202F\u2060]", lines[i+1], flags=re.UNICODE)):
                  has_ligature_dashes = True
              if has_literal_dashes and has_ligature_dashes:
                  # TODO: insert a warning note in the document?
@@ -1866,40 +1768,46 @@ def convert_dashligatures(document):
  
  def revert_dashligatures(document):
      """Remove font ligature settings for en- and em-dashes.
-    Revert conversion of \twodashes or \threedashes to literal dashes."""
+    Revert conversion of \twodashes or \threedashes to literal dashes.
+    """
      use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
      if use_dash_ligatures != "true" or document.backend != "latex":
          return
-    j = 0
-    new_body = []
-    for i, line in enumerate(document.body):
-        # Skip some document parts where dashes are not converted
-        if (i < j) or line.startswith("\\labelwidthstring"):
-            new_body.append(line)
+    i = 0
+    dash_pattern = re.compile(u".*[\u2013\u2014]")
+    while True:
+        # skip lines without dashes:
+        i = find_re(document.body, dash_pattern, i+1)
+        if i == -1:
+            break
+        line = document.body[i]
+        # skip label width string (see bug 10243):
+        if line.startswith("\\labelwidthstring"):
              continue
-        if (line.startswith("\\begin_inset ") and
-            line[13:].split()[0] in ["CommandInset", "ERT", "External",
-                "Formula", "FormulaMacro", "Graphics", "IPA", "listings"]
-            or line == "\\begin_inset Flex Code"):
-            j = find_end_of_inset(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of "
-                                 + words[1] + " inset at line " + str(i))
-            new_body.append(line)
+        # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
+        try:
+            inset_type, start, end = get_containing_inset(document.body, i)
+        except TypeError: # no containing inset
+            inset_type, start, end = "no inset", -1, -1
+        if (inset_type.split()[0] in
+            ["CommandInset", "ERT", "External", "Formula",
+                "FormulaMacro", "Graphics", "IPA", "listings"]
+            or inset_type == "Flex Code"):
+            i = end
              continue
-        if line == "\\begin_layout LyX-Code":
-            j = find_end_of_layout(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: "
-                    "Can't find end of %s layout at line %d" % (words[1],i))
-            new_body.append(line)
+        try:
+            layoutname, start, end, j = get_containing_layout(document.body, i)
+        except TypeError: # no (or malformed) containing layout
+            document.warning("Malformed LyX document: "
+                            "Can't find layout at body line %d" % i)
+            continue
+        if layoutname == "LyX-Code":
+            i = end
              continue
          # TODO: skip replacement in typewriter fonts
          line = line.replace(u'\u2013', '\\twohyphens\n')
          line = line.replace(u'\u2014', '\\threehyphens\n')
-        lines = line.split('\n')
-        new_body.extend(line.split('\n'))
-    document.body = new_body
+        document.body[i:i+1] = line.split('\n')
      # redefine the dash LICRs to use ligature dashes:
      add_to_preamble(document, [r'\renewcommand{\textendash}{--}',
                                 r'\renewcommand{\textemdash}{---}'])
@@ -2157,6 +2065,30 @@ def revert_minted(document):
      del_token(document.header, "\\use_minted")
  
  
+def revert_longtable_lscape(document):
+    " revert the longtable landcape mode to ERT "
+    i = 0
+    regexp = re.compile(r'^<features rotate=\"90\"\s.*islongtable=\"true\"\s.*$', re.IGNORECASE)
+    while True:
+        i = find_re(document.body, regexp, i)
+        if i == -1:
+            return
+
+        document.body[i] = document.body[i].replace(" rotate=\"90\"", "")
+        lay = get_containing_layout(document.body, i)
+        if lay == False:
+            document.warning("Longtable has not layout!")
+            i += 1
+            continue
+        begcmd = put_cmd_in_ert("\\begin{landscape}")
+        endcmd = put_cmd_in_ert("\\end{landscape}")
+        document.body[lay[2] : lay[2]] = endcmd + ["\\end_layout"]
+        document.body[lay[1] : lay[1]] = ["\\begin_layout " + lay[0], ""] + begcmd
+
+        add_to_preamble(document, ["\\usepackage{pdflscape}"])
+        i = lay[2]
+
+
  ##
  # Conversion hub
  #
@@ -2202,7 +2134,7 @@ convert = [
            ]
  
  revert =  [
-           [543, [revert_minted]],
+           [543, [revert_minted, revert_longtable_lscape]],
             [542, [revert_mathnumberingname]],
             [541, [revert_mathnumberpos]],
             [540, [revert_allowbreak]],