unicodesymbols: add some missing punctuation characters again

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index f647603335629f5b3aaeaa8d54dff311731ada70..898b0b90e634fb45d88278c2f58dc0cfb928dfa7 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -230,6 +230,8 @@ where at least two languages have different default encodings are encoded
  in multiple encodings for file formats < 249. These files are incorrectly
  read and written (as if the whole file was in the encoding of the main
  language).
+This is not true for files written by CJK-LyX, they are always in the locale
+encoding.
  
  This function
  - converts from fake unicode values to true unicode if forward is true, and
@@ -239,6 +241,8 @@ document.encoding must be set to the old value (format 248) in both cases.
  We do this here and not in LyX.py because it is far easier to do the
  necessary parsing in modern formats than in ancient ones.
  """
+    if document.cjk_encoding != '':
+        return
      encoding_stack = [document.encoding]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      if document.inputencoding == "auto" or document.inputencoding == "default":
@@ -247,18 +251,22 @@ necessary parsing in modern formats than in ancient ones.
              if result:
                  language = result.group(1)
                  if language == "default":
-                    document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
+                    document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
                      encoding_stack[-1] = document.encoding
                  else:
                      from lyx2lyx_lang import lang
-                    document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
+                    document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
                      encoding_stack[-1] = lang[language][3]
              elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
-                document.warning("Adding nested encoding %s." % encoding_stack[-1])
+                document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
                  encoding_stack.append(encoding_stack[-1])
              elif find_token(document.body, "\\end_layout", i, i + 1) == i:
-                document.warning("Removing nested encoding %s." % encoding_stack[-1])
-                del encoding_stack[-1]
+                document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
+                if len(encoding_stack) == 1:
+                    # Don't remove the document encoding from the stack
+                    document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
+                else:
+                    del encoding_stack[-1]
              if encoding_stack[-1] != document.encoding:
                  if forward:
                      # This line has been incorrectly interpreted as if it was
@@ -292,7 +300,7 @@ def revert_utf8(document):
      elif get_value(document.header, "\\inputencoding", i) == "utf8":
          document.header[i] = "\\inputencoding auto"
      document.inputencoding = get_value(document.header, "\\inputencoding", 0)
-    document.encoding = get_encoding(document.language, document.inputencoding, 248)
+    document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
      convert_multiencoding(document, False)
  
  
@@ -332,19 +340,24 @@ key "argument"
  
  This must be called after convert_commandparams.
  """
-    regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
      i = 0
      while 1:
          i = find_token(document.body, "\\bibitem", i)
          if i == -1:
              break
-        match = re.match(regex, document.body[i])
-        option = match.group(1)
-        argument = match.group(2)
+        j = document.body[i].find('[') + 1
+        k = document.body[i].rfind(']')
+        if j == 0: # No optional argument found
+            option = None
+        else:
+            option = document.body[i][j:k]
+        j = document.body[i].rfind('{') + 1
+        k = document.body[i].rfind('}')
+        argument = document.body[i][j:k]
          lines = ['\\begin_inset LatexCommand bibitem']
          if option != None:
-            lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
-        lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
+            lines.append('label "%s"' % option.replace('"', '\\"'))
+        lines.append('key "%s"' % argument.replace('"', '\\"'))
          lines.append('')
          lines.append('\\end_inset')
          document.body[i:i+1] = lines
@@ -724,7 +737,7 @@ def convert_lyxline(document):
          k = 0
          while i < len(document.body):
              i = find_token(document.body, "\\size " + fontsizes[n], i)
-            k = find_token(document.body, "\\lyxline",i)
+            k = find_token(document.body, "\\lyxline", i)
              # the corresponding fontsize command is always 2 lines before the \lyxline
              if (i != -1 and k == i+2):
                  document.body[i:i+1] = []
@@ -1016,11 +1029,11 @@ def revert_accent(document):
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
      # encoding.
-    encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
+    encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      for i in range(len(document.body)):
  
-        if document.inputencoding == "auto" or document.inputencoding == "default":
+        if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
              # Track the encoding of the current line
              result = lang_re.match(document.body[i])
              if result:
@@ -1177,6 +1190,473 @@ def normalize_font_whitespace(document):
  
          i = i + 1
  
+
+def revert_utf8x(document):
+    " Set utf8x encoding to utf8. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "utf8x":
+            document.header[i] = "\\inputencoding utf8"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def revert_utf8plain(document):
+    " Set utf8plain encoding to utf8. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "utf8-plain":
+            document.header[i] = "\\inputencoding utf8"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def revert_beamer_alert(document):
+    " Revert beamer's \\alert inset back to ERT. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_inset ERT"
+        i = i + 1
+        while 1:
+            if (document.body[i][:13] == "\\begin_layout"):
+                # Insert the \alert command
+                document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
+                break
+            i = i + 1
+
+        i = i + 1
+
+
+def revert_beamer_structure(document):
+    " Revert beamer's \\structure inset back to ERT. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_inset ERT"
+        i = i + 1
+        while 1:
+            if (document.body[i][:13] == "\\begin_layout"):
+                document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
+                break
+            i = i + 1
+
+        i = i + 1
+
+
+def convert_changes(document):
+    " Switch output_changes off if tracking_changes is off. "
+    i = find_token(document.header, '\\tracking_changes', 0)
+    if i == -1:
+        document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
+        return
+    j = find_token(document.header, '\\output_changes', 0)
+    if j == -1:
+        document.warning("Malformed lyx document: Missing '\\output_changes'.")
+        return
+    tracking_changes = get_value(document.header, "\\tracking_changes", i)
+    output_changes = get_value(document.header, "\\output_changes", j)
+    if tracking_changes == "false" and output_changes == "true":
+        document.header[j] = "\\output_changes false"
+
+
+def revert_ascii(document):
+    " Set ascii encoding to auto. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "ascii":
+            document.header[i] = "\\inputencoding auto"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def normalize_language_name(document):
+    lang = { "brazil": "brazilian",
+             "portuges": "portuguese"}
+
+    if document.language in lang:
+        document.language = lang[document.language]
+        i = find_token(document.header, "\\language", 0)
+        document.header[i] = "\\language %s" % document.language
+
+
+def revert_language_name(document):
+    lang = { "brazilian": "brazil",
+             "portuguese": "portuges"}
+
+    if document.language in lang:
+        document.language = lang[document.language]
+        i = find_token(document.header, "\\language", 0)
+        document.header[i] = "\\language %s" % document.language
+
+#
+#  \textclass cv -> \textclass simplecv
+def convert_cv_textclass(document):
+    if document.textclass == "cv":
+        document.textclass = "simplecv"
+
+
+def revert_cv_textclass(document):
+    if document.textclass == "simplecv":
+        document.textclass = "cv"
+
+
+def convert_tableborder(document):
+    # The problematic is: LyX double the table cell border as it ignores the "|" character in
+    # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
+        k = document.body[i].find("|>{", 0, len(document.body[i]))
+        # the two tokens have to be in one line
+        if (h != -1 and k != -1):
+            # delete the "|"
+            document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
+        i = i + 1
+
+
+def revert_tableborder(document):
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
+        k = document.body[i].find(">{", 0, len(document.body[i]))
+        # the two tokens have to be in one line
+        if (h != -1 and k != -1):
+            # add the "|"
+            document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
+        i = i + 1
+
+
+def revert_armenian(document):
+    
+    # set inputencoding from armscii8 to auto 
+    if document.inputencoding == "armscii8":
+        i = find_token(document.header, "\\inputencoding", 0)
+        if i != -1:
+            document.header[i] = "\\inputencoding auto"
+    # check if preamble exists, if not k is set to -1 
+    i = 0
+    k = -1
+    while i < len(document.preamble):
+        if k == -1:
+            k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
+        if k == -1:
+            k = document.preamble[i].find("%", 0, len(document.preamble[i]))
+        i = i + 1
+    # add the entry \usepackage{armtex} to the document preamble
+    if document.language == "armenian":
+        # set the armtex entry as the first preamble line
+        if k != -1:
+            document.preamble[0:0] = ["\\usepackage{armtex}"]
+        # create the preamble when it doesn't exist
+        else:
+            document.preamble.append('\\usepackage{armtex}')
+    # Set document language from armenian to english 
+    if document.language == "armenian":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+
+
+def revert_CJK(document):
+    " Set CJK encodings to default and languages chinese, japanese and korean to english. "
+    encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
+                 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc in encodings:
+            document.header[i] = "\\inputencoding default"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+    if document.language == "chinese-simplified" or \
+       document.language == "chinese-traditional" or \
+       document.language == "japanese" or document.language == "korean":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+
+
+def revert_preamble_listings_params(document):
+    " Revert preamble option \listings_params "
+    i = find_token(document.header, "\\listings_params", 0)
+    if i != -1:
+        document.preamble.append('\\usepackage{listings}')
+        document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
+        document.header.pop(i);
+
+
+def revert_listings_inset(document):
+    r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate 
+FROM
+
+\begin_inset 
+lstparams "language=Delphi"
+inline true
+status open
+
+\begin_layout Standard
+var i = 10;
+\end_layout
+
+\end_inset
+
+TO
+
+\begin_inset ERT
+status open
+\begin_layout Standard
+
+
+\backslash
+lstinline[language=Delphi]{var i = 10;}
+\end_layout
+
+\end_inset
+
+There can be an caption inset in this inset
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+before label
+\begin_inset LatexCommand label
+name "lst:caption"
+
+\end_inset
+
+after label
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+'''
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset listings', i)
+        if i == -1:
+            break
+        else:
+            if not '\\usepackage{listings}' in document.preamble:
+                document.preamble.append('\\usepackage{listings}')
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        inline = 'false'
+        params = ''
+        status = 'open'
+        # first three lines
+        for line in range(i + 1, i + 4):
+            if document.body[line].startswith('inline'):
+                inline = document.body[line].split()[1]
+            if document.body[line].startswith('lstparams'):
+                params = document.body[line].split()[1].strip('"')
+            if document.body[line].startswith('status'):
+                status = document.body[line].split()[1].strip()
+                k = line + 1
+        # caption?
+        caption = ''
+        label = ''
+        cap = find_token(document.body, '\\begin_inset Caption', i)
+        if cap != -1:
+            cap_end = find_end_of_inset(document.body, cap + 1)
+            if cap_end == -1:
+                # this should not happen
+                break
+            # label?
+            lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
+            if lbl != -1:
+                lbl_end = find_end_of_inset(document.body, lbl + 1)
+                if lbl_end == -1:
+                    # this should not happen
+                    break
+            else:
+                lbl = cap_end
+                lbl_end = cap_end
+            for line in document.body[lbl : lbl_end + 1]:
+                if line.startswith('name '):
+                    label = line.split()[1].strip('"')
+                    break
+            for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
+                if not line.startswith('\\'):
+                    caption += line.strip()
+            k = cap_end + 1
+        inlinecode = ''
+        # looking for the oneline code for lstinline
+        inlinecode = document.body[find_end_of_layout(document.body, 
+            find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
+        if len(caption) > 0:
+            if len(params) == 0:
+                params = 'caption={%s}' % caption
+            else:
+                params += ',caption={%s}' % caption
+        if len(label) > 0:
+            if len(params) == 0:
+                params = 'label={%s}' % label
+            else:
+                params += ',label={%s}' % label
+        if len(params) > 0:
+            params = '[%s]' % params
+            params = params.replace('\\', '\\backslash\n')
+        if inline == 'true':
+            document.body[i:(j+1)] = [r'\begin_inset ERT',
+                                      'status %s' % status,
+                                      r'\begin_layout Standard',
+                                      '', 
+                                      '',
+                                      r'\backslash',
+                                      'lstinline%s{%s}' % (params, inlinecode),
+                                      r'\end_layout',
+                                      '',
+                                      r'\end_inset']
+        else:
+            document.body[i: j+1] =  [r'\begin_inset ERT',
+                                      'status %s' % status,
+                                      '',
+                                      r'\begin_layout Standard',
+                                      '',
+                                      '',
+                                      r'\backslash',
+                                      r'begin{lstlisting}%s' % params,
+                                      r'\end_layout'
+                                    ] + document.body[k : j - 1] + \
+                                     ['',
+                                      r'\begin_layout Standard',
+                                      '',
+                                      r'\backslash',
+                                      'end{lstlisting}',
+                                      r'\end_layout',
+                                      '',
+                                      r'\end_inset']
+            
+
+def revert_include_listings(document):
+    r''' Revert lstinputlisting Include option , translate
+\begin_inset Include \lstinputlisting{file}[opt]
+preview false
+
+\end_inset
+
+TO
+
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+
+\backslash
+lstinputlisting{file}[opt]
+\end_layout
+
+\end_inset
+    '''
+
+    i = 0
+    while True:
+        i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
+        if i == -1:
+            break
+        else:
+            if not '\\usepackage{listings}' in document.preamble:
+                document.preamble.append('\\usepackage{listings}')
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        # find command line lstinputlisting{file}[options]
+        cmd, file, option = '', '', ''
+        if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
+            cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()            
+        option = option.replace('\\', '\\backslash\n')
+        document.body[i : j + 1] = [r'\begin_inset ERT',
+                                    'status open',
+                                    '',
+                                    r'\begin_layout Standard',
+                                    '',
+                                    '',
+                                    r'\backslash',
+                                    '%s%s{%s}' % (cmd, option, file),
+                                    r'\end_layout',
+                                    '',
+                                    r'\end_inset']
+
+
+def revert_ext_font_sizes(document):
+    if document.backend != "latex": return
+    if not document.textclass.startswith("ext"): return
+
+    fontsize = get_value(document.header, '\\paperfontsize', 0)
+    if fontsize not in ('10', '11', '12'): return
+    fontsize += 'pt'
+
+    i = find_token(document.header, '\\paperfontsize', 0)
+    document.header[i] = '\\paperfontsize default'
+
+    i = find_token(document.header, '\\options', 0)
+    if i == -1:
+        i = find_token(document.header, '\\textclass', 0) + 1
+        document.header[i:i] = ['\\options %s' % fontsize]
+    else:
+        document.header[i] += ',%s' % fontsize
+
+
+def convert_ext_font_sizes(document):
+    if document.backend != "latex": return
+    if not document.textclass.startswith("ext"): return
+
+    fontsize = get_value(document.header, '\\paperfontsize', 0)
+    if fontsize != 'default': return
+
+    i = find_token(document.header, '\\options', 0)
+    if i == -1: return
+
+    options = get_value(document.header, '\\options', i)
+
+    fontsizes = '10pt', '11pt', '12pt'
+    for fs in fontsizes:
+        if options.find(fs) != -1:
+            break
+    else: # this else will only be attained if the for cycle had no match
+        return
+
+    options = options.split(',')
+    for j, opt in enumerate(options):
+        if opt in fontsizes:
+            fontsize = opt[:-2]
+            del options[j]
+            break
+    else:
+        return
+
+    k = find_token(document.header, '\\paperfontsize', 0)
+    document.header[k] = '\\paperfontsize %s' % fontsize
+
+    if options:
+        document.header[i] = '\\options %s' % ','.join(options)
+    else:
+        del document.header[i]
+
+
  ##
  # Conversion hub
  #
@@ -1195,9 +1675,35 @@ convert = [[246, []],
             [256, []],
             [257, [convert_caption]],
             [258, [convert_lyxline]],
-           [259, [convert_accent, normalize_font_whitespace]]]
-
-revert =  [[258, []],
+           [259, [convert_accent, normalize_font_whitespace]],
+           [260, []],
+           [261, [convert_changes]],
+           [262, []],
+           [263, [normalize_language_name]],
+           [264, [convert_cv_textclass]],
+           [265, [convert_tableborder]],
+           [266, []],
+           [267, []],
+           [268, []],
+           [269, []],
+           [270, []],
+           [271, [convert_ext_font_sizes]]
+          ]
+
+revert =  [
+           [270, [revert_ext_font_sizes]],
+           [269, [revert_beamer_alert, revert_beamer_structure]],
+           [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
+           [267, [revert_CJK]],
+           [266, [revert_utf8plain]],
+           [265, [revert_armenian]],
+           [264, [revert_tableborder]],
+           [263, [revert_cv_textclass]],
+           [262, [revert_language_name]],
+           [261, [revert_ascii]],
+           [260, []],
+           [259, [revert_utf8x]],
+           [258, []],
             [257, []],
             [256, [revert_caption]],
             [255, [revert_encodings]],