German UserGuide.lyx: updates by Hartmut

[lyx.git] / lib / lyx2lyx / lyx_1_6.py
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py

index 11349ed64f99736f56d0bc3e492c8f5283a54c7d..07f69236a843fcf3e4c46ead68ef7d0d20382034 100644 (file)
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -43,10 +43,18 @@ def find_end_of_inset(lines, i):
  # where the last statement resets the counter to accord with the added
  # lines.
  def wrap_into_ert(string, src, dst):
-    " Wrap a something into an ERT"
+    '''Within string, replace occurrences of src with dst, wrapped into ERT
+       E.g.: wrap_into_ert('sch\"on', "\\", "\\backslash") is:
+       sch<ERT>\\backslash</ERT>"on'''
      return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
        + dst + '\n\\end_layout\n\\end_inset\n')
  
+def put_cmd_in_ert(string):
+    string = string.replace('\\', "\\backslash\n")
+    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
+      + string + "\n\\end_layout\n\\end_inset"
+    return string
+
  def add_to_preamble(document, text):
      """ Add text to the preamble if it is not already there.
      Only the first line is checked!"""
@@ -125,12 +133,14 @@ def read_unicodesymbols():
      # as: \"u.
      r = re.compile(r'\\\\(\W)\{(\w)\}')
      for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "" and line.find("\\") != -1:
+        if line[0] != '#' and line.strip() != "":
              line=line.replace(' "',' ') # remove all quotation marks with spaces before
              line=line.replace('" ',' ') # remove all quotation marks with spaces after
              line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
              try:
                  [ucs4,command,dead] = line.split(None,2)
+                if command[0:1] != "\\":
+                    continue
                  spec_chars.append([command, unichr(eval(ucs4))])
              except:
                  continue
@@ -147,18 +157,84 @@ def read_unicodesymbols():
      return spec_chars
  
  
-def line2lyx(line):
-    '''Converts LaTeX commands, such as: \"u, to unicode characters, and
-       escapes backslashes, etc, into ERT. line may well be a multi-line
-       string when it is returned.
-       NOTE: If we want to convert \label{} into an InsetLabel, then this
-       is the place to do it.'''
+def extract_argument(line):
+    'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
+
+    if not line:
+        return (None, "")
+
+    bracere = re.compile("(\s*)(.*)")
+    n = bracere.match(line)
+    whitespace = n.group(1)
+    stuff = n.group(2)
+    brace = stuff[:1]
+    if brace != "[" and brace != "{":
+        return (None, line)
+
+    # find closing brace
+    remain = stuff[1:]
+    pos = 0
+    num = 1
+    term = "}"
+    if brace == "[":
+        term = "]"
+    skip = False
+    for c in remain:
+        if skip:
+            skip = False
+        elif c == "\\":
+            skip = True
+        elif c == brace:
+            num += 1
+        elif c == term:
+            num -= 1
+        if c == 0:
+            break
+        pos += 1
+    if num != 0:
+        # We never found the matching brace
+        # So, to be on the safe side, let's just return everything
+        # which will then get wrapped as ERT
+        return (line, "")
+    return (line[:pos + 1], line[pos + 1:])
+
+
+def latex2ert(line):
+    '''Converts LaTeX commands into ERT. line may well be a multi-line
+       string when it is returned.'''
      if not line:
          return line
-    line = wrap_into_ert(line, '\\', '\\backslash')
-    line = wrap_into_ert(line, '{', '{')
-    line = wrap_into_ert(line, '}', '}')
-    return line
+
+    retval = ""
+    ## FIXME Escaped \ ??
+    # This regex looks for a LaTeX command---i.e., something of the form
+    # "\alPhaStuFF", or "\X", where X is any character---where the command
+    # may also be preceded by an additional backslash, which is how it would
+    # appear (e.g.) in an InsetIndex.
+    labelre = re.compile(r'(.*?)\\?(\\(?:[a-zA-Z]+|.))(.*)')
+
+    m = labelre.match(line)
+    while m != None:
+        retval += m.group(1)
+        cmd = m.group(2)
+        end = m.group(3)
+
+        while True:
+            (arg, rest) = extract_argument(end)
+            if arg == None:
+                break
+            cmd += arg
+            end = rest
+        # If we wanted to put labels into an InsetLabel, for example, then we
+        # would just need to test here for cmd == "label" and then take some
+        # appropriate action, i.e., to use arg to get the content and then 
+        # wrap it appropriately.
+        cmd = put_cmd_in_ert(cmd)
+        retval += "\n" + cmd + "\n"
+        line = end
+        m = labelre.match(line)
+    retval += line
+    return retval
  
  
  def latex2lyx(data):
@@ -166,21 +242,36 @@ def latex2lyx(data):
      converting LaTeX constructs into LyX constructs. Returns a list of
      lines, suitable for insertion into document.body.'''
  
-    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
      retval = []
  
      # Convert LaTeX to Unicode
      reps = read_unicodesymbols()
+    # Commands of this sort need to be checked to make sure they are
+    # followed by a non-alpha character, lest we replace too much.
+    hardone = re.compile(r'^\\\\[a-zA-Z]+$')
+
      for rep in reps:
-        try:
+        if hardone.match(rep[0]):
+            pos = 0
+            while True:
+                pos = data.find(rep[0], pos)
+                if pos == -1:
+                    break
+                nextpos = pos + len(rep[0])
+                if nextpos < len(data) and data[nextpos].isalpha():
+                    # not the end of that command
+                    pos = nextpos
+                    continue
+                data = data[:pos] + rep[1] + data[nextpos:]
+                pos = nextpos
+        else:
              data = data.replace(rep[0], rep[1])
-        except:
-            # There seems to be a character in the unicodesymbols file
-            # that causes problems, namely, 0x2109.
-            pass
+
      # Generic, \" -> ":
      data = wrap_into_ert(data, r'\"', '"')
+
      # Math:
+    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
      lines = data.split('\n')
      for line in lines:
          #document.warning("LINE: " + line)
@@ -194,19 +285,86 @@ def latex2lyx(data):
              g = m.group(3)
              if s:
                  # this is non-math!
-                s = line2lyx(s)
+                s = latex2ert(s)
                  subst = s.split('\n')
                  retval += subst
              retval.append("\\begin_inset Formula " + f)
              retval.append("\\end_inset")
              m = mathre.match(g)
          # Handle whatever is left, which is just text
-        g = line2lyx(g)
+        g = latex2ert(g)
          subst = g.split('\n')
          retval += subst
      return retval
  
  
+def lyx2latex(lines):
+    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
+    # clean up multiline stuff
+    content = ""
+    ert_end = 0
+    reps = read_unicodesymbols()
+  
+    for curline in range(len(lines)):
+      line = lines[curline]
+      if line.startswith("\\begin_inset ERT"):
+          # We don't want to replace things inside ERT, so figure out
+          # where the end of the inset is.
+          ert_end = find_end_of_inset(lines, curline + 1)
+          continue
+      elif line.startswith("\\begin_inset Formula"):
+          line = line[20:]
+      elif line.startswith("\\begin_inset Quotes"):
+          # For now, we do a very basic reversion. Someone who understands
+          # quotes is welcome to fix it up.
+          qtype = line[20:].strip()
+          # lang = qtype[0]
+          side = qtype[1]
+          dbls = qtype[2]
+          if side == "l":
+              if dbls == "d":
+                  line = "``"
+              else:
+                  line = "`"
+          else:
+              if dbls == "d":
+                  line = "''"
+              else:
+                  line = "'"
+      elif line.isspace() or \
+            line.startswith("\\begin_layout") or \
+            line.startswith("\\end_layout") or \
+            line.startswith("\\begin_inset") or \
+            line.startswith("\\end_inset") or \
+            line.startswith("\\lang") or \
+            line.strip() == "status collapsed" or \
+            line.strip() == "status open":
+          #skip all that stuff
+          continue
+  
+      # a lossless reversion is not possible
+      # try at least to handle some common insets and settings
+      # do not replace inside ERTs
+      if ert_end >= curline:
+          line = line.replace(r'\backslash', r'\\')
+      else:
+          # Do the LyX text --> LaTeX conversion
+          for rep in reps:
+            line = line.replace(rep[1], rep[0])
+          line = line.replace(r'\backslash', r'\textbackslash{}')
+          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
+          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
+          line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
+          line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
+          line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
+          line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
+          line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
+          line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
+          line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
+      content += line
+    return content
+
+
  ####################################################################
  
  def convert_ltcaption(document):
@@ -913,58 +1071,13 @@ def revert_latexcommand_index(document):
          j = find_end_of_inset(document.body, i + 1)
          if j == -1:
            return
-        del document.body[j - 1]
-        del document.body[j - 2] # \end_layout
-        document.body[i] =  "\\begin_inset CommandInset index"
-        document.body[i + 1] =  "LatexCommand index"
-        # clean up multiline stuff
-        content = ""
-        ert_end = 0
-        for k in range(i + 3, j - 2):
-          line = document.body[k]
-          if line.startswith("\\begin_inset ERT"):
-              ert_end = find_end_of_inset(document.body, k + 1)
-              line = line[16:]
-          if line.startswith("\\begin_inset Formula"):
-            line = line[20:]
-          if line.startswith("\\begin_layout Standard"):
-            line = line[22:]
-          if line.startswith("\\begin_layout Plain Layout"):
-            line = line[26:]
-          if line.startswith("\\end_layout"):
-            line = line[11:]
-          if line.startswith("\\end_inset"):
-            line = line[10:]
-          if line.startswith("status collapsed"):
-            line = line[16:]
-          if line.startswith("status open"):
-            line = line[11:]
-          # a lossless reversion is not possible
-          # try at least to handle some common insets and settings
-          # do not replace inside ERTs
-          if ert_end < k:
-              # Do the LyX text --> LaTeX conversion
-              for rep in replacements:
-                line = line.replace(rep[1], rep[0])
-              line = line.replace(r'\backslash', r'\textbackslash{}')
-              line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
-              line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
-              line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
-              line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
-              line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
-              line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
-              line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
-              line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
-              line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
-          else:
-              line = line.replace(r'\backslash', r'\\')
-          content = content + line;
-        document.body[i + 3] = "name " + '"' + content + '"'
-        for k in range(i + 4, j - 2):
-          del document.body[i + 4]
-        document.body.insert(i + 4, "")
-        del document.body[i + 2] # \begin_layout standard
-        i = i + 5
+
+        content = lyx2latex(document.body[i:j])
+        # escape quotes
+        content = content.replace('"', r'\"')
+        document.body[i:j] = ["\\begin_inset CommandInset index", "LatexCommand index",
+            "name " + '"' + content + '"', ""]
+        i += 5
  
  
  def revert_wraptable(document):
@@ -996,6 +1109,24 @@ def revert_vietnamese(document):
          j = j + 1
  
  
+def convert_japanese_cjk(document):
+    "Set language japanese to japanese-cjk"
+    # Set document language from japanese-plain to japanese
+    i = 0
+    if document.language == "japanese":
+        document.language = "japanese-cjk"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language japanese-cjk"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang japanese", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang japanese", "\\lang japanese-cjk")
+        j = j + 1
+
+
  def revert_japanese(document):
      "Set language japanese-plain to japanese"
      # Set document language from japanese-plain to japanese
@@ -1014,6 +1145,24 @@ def revert_japanese(document):
          j = j + 1
  
  
+def revert_japanese_cjk(document):
+    "Set language japanese-cjk to japanese"
+    # Set document language from japanese-plain to japanese
+    i = 0
+    if document.language == "japanese-cjk":
+        document.language = "japanese"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language japanese"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang japanese-cjk", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang japanese-cjk", "\\lang japanese")
+        j = j + 1
+
+
  def revert_japanese_encoding(document):
      "Set input encoding form EUC-JP-plain to EUC-JP etc."
      # Set input encoding form EUC-JP-plain to EUC-JP etc.
@@ -2731,7 +2880,7 @@ convert = [[277, [fix_wrong_tables]],
             [289, [convert_latexcommand_index]],
             [290, []],
             [291, []],
-           [292, []],
+           [292, [convert_japanese_cjk]],
             [293, []],
             [294, [convert_pdf_options]],
             [295, [convert_htmlurl, convert_url]],
@@ -2826,7 +2975,7 @@ revert =  [[337, [revert_polytonicgreek]],
             [294, [revert_href, revert_url]],
             [293, [revert_pdf_options_2]],
             [292, [revert_inset_info]],
-           [291, [revert_japanese, revert_japanese_encoding]],
+           [291, [revert_japanese, revert_japanese_encoding, revert_japanese_cjk]],
             [290, [revert_vietnamese]],
             [289, [revert_wraptable]],
             [288, [revert_latexcommand_index]],