X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx_1_6.py;h=7a3710e47e46f1f3e9be95d77afc47a90ec424b7;hb=c52808ce1a804a875514713eb6f5dcd6f05dd6c8;hp=8a33eaace795b63508bd4aea6a506df195dc17d2;hpb=d7c31dfff3d647b6b16942f0f8c001966a470a4c;p=features.git

diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py
index 8a33eaace7..7a3710e47e 100644
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -14,7 +14,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 """ Convert files to the file format generated by lyx 1.6"""
 
@@ -22,11 +22,28 @@ import re
 import unicodedata
 import sys, os
 
-from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
+from parser_tools import find_token, find_end_of, find_tokens, get_value
+from unicode_symbols import unicode_reps
 
 ####################################################################
 # Private helper functions
 
+
+def get_value_string(lines, token, start, end = 0, trim = False, default = ""):
+    """ get_value_string(lines, token, start[[, end], trim, default]) -> string
+
+    Return tokens after token as string, in lines, where
+    token is the first element. When trim is used, the first and last character
+    of the string is trimmed."""
+
+    val = get_value(lines, token, start, end, "")
+    if not val:
+      return default
+    if trim:
+      return val[1:-1]
+    return val
+
+
 def find_end_of_inset(lines, i):
     " Find end of inset, where lines[i] is included."
     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
@@ -35,7 +52,7 @@ def find_end_of_inset(lines, i):
 # DO NOT do this:
 #   document.body[i] = wrap_insert_ert(...)
 # wrap_into_ert may returns a multiline string, which should NOT appear
-# in document.body. Insetad, do something like this:
+# in document.body. Instead, do something like this:
 #   subst = wrap_inset_ert(...)
 #   subst = subst.split('\n')
 #   document.body[i:i+1] = subst
@@ -78,7 +95,7 @@ def convert_len(len):
              "theight%":"\\backslash\ntextheight", "pheight%":"\\backslash\npageheight"}
 
     # Convert LyX units to LaTeX units
-    for unit in units.keys():
+    for unit in list(units.keys()):
         if len.find(unit) != -1:
             len = '%f' % (len2value(len) / 100)
             len = len.strip('0') + units[unit]
@@ -129,51 +146,13 @@ def set_option(document, m, option, value):
     return l
 
 
-def read_unicodesymbols():
-    " Read the unicodesymbols list of unicode characters and corresponding commands."
-    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
-    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
-    spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
-    # in brackets. The idea is to check for constructs like: \"{u}, which is how
-    # they are written in the unicodesymbols file; but they can also be written
-    # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
-    for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "":
-            line=line.replace(' "',' ') # remove all quotation marks with spaces before
-            line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
-            try:
-                [ucs4,command,dead] = line.split(None,2)
-                if command[0:1] != "\\":
-                    continue
-                spec_chars.append([command, unichr(eval(ucs4))])
-            except:
-                continue
-            m = r.match(command)
-            if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
-                commandbl = command
-                command += m.group(1) + m.group(2)
-                commandbl += m.group(1) + ' ' + m.group(2)
-                spec_chars.append([command, unichr(eval(ucs4))])
-                spec_chars.append([commandbl, unichr(eval(ucs4))])
-    fp.close()
-    return spec_chars
-
-
 def extract_argument(line):
     'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
 
     if not line:
         return (None, "")
 
-    bracere = re.compile("(\s*)(.*)")
+    bracere = re.compile(r"(\s*)(.*)")
     n = bracere.match(line)
     whitespace = n.group(1)
     stuff = n.group(2)
@@ -253,8 +232,6 @@ def latex2ert(line, isindex):
     return retval
 
 
-unicode_reps = read_unicodesymbols()
-
 #Bug 5022....
 #Might should do latex2ert first, then deal with stuff that DOESN'T
 #end up inside ERT. That routine could be modified so that it returned
@@ -300,7 +277,7 @@ def latex2lyx(data, isindex):
     data = data.replace('\\\\', '\\')
 
     # Math:
-    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+    mathre = re.compile(r'^(.*?)(\$.*?\$)(.*)')
     lines = data.split('\n')
     for line in lines:
         #document.warning("LINE: " + line)
@@ -407,7 +384,7 @@ def lyx2latex(document, lines):
             continue
         inert = ert_end >= curline
         content += lyxline2latex(document, lines[curline], inert)
-      
+
     return content
 
 
@@ -422,6 +399,7 @@ def convert_ltcaption(document):
         j = find_end_of_inset(document.body, i + 1)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
             continue
 
         nrows = int(document.body[i+1].split('"')[3])
@@ -513,6 +491,7 @@ def convert_tablines(document):
         j = find_end_of_inset(document.body, i + 1)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
             continue
 
         m = i + 1
@@ -572,9 +551,10 @@ def revert_tablines(document):
         i = find_token(document.body, "\\begin_inset Tabular", i)
         if i == -1:
             return
-        j = find_end_of_inset(document.body, i + 1)
+        j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
             continue
 
         m = i + 1
@@ -651,6 +631,7 @@ def fix_wrong_tables(document):
         j = find_end_of_inset(document.body, i + 1)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
             continue
 
         m = i + 1
@@ -773,7 +754,7 @@ def convert_flex(document):
         document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
 
 def revert_flex(document):
-    "Convert Flex to CharStyle"
+    "Revert Flex to CharStyle"
     i = 0
     while True:
         i = find_token(document.body, "\\begin_inset Flex", i)
@@ -945,7 +926,7 @@ def revert_pdf_options(document):
 def remove_inzip_options(document):
     "Remove inzipName and embed options from the Graphics inset"
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset Graphics", i)
         if i == -1:
             return
@@ -953,6 +934,8 @@ def remove_inzip_options(document):
         if j == -1:
             # should not happen
             document.warning("Malformed LyX document: Could not find end of graphics inset.")
+            i += 1
+            continue
         # If there's a inzip param, just remove that
         k = find_token(document.body, "\tinzipName", i + 1, j)
         if k != -1:
@@ -963,7 +946,7 @@ def remove_inzip_options(document):
 
 
 def convert_inset_command(document):
-    """
+    r"""
         Convert:
             \begin_inset LatexCommand cmd
         to
@@ -971,7 +954,7 @@ def convert_inset_command(document):
             LatexCommand cmd
     """
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset LatexCommand", i)
         if i == -1:
             return
@@ -1000,7 +983,7 @@ def convert_inset_command(document):
 
 
 def revert_inset_command(document):
-    """
+    r"""
         Convert:
             \begin_inset CommandInset InsetType
             LatexCommand cmd
@@ -1010,7 +993,7 @@ def revert_inset_command(document):
         will not be able to recognize. Not sure what to do about that.
     """
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset CommandInset", i)
         if i == -1:
             return
@@ -1080,7 +1063,7 @@ def revert_wrapfig_options(document):
 
 
 def convert_latexcommand_index(document):
-    "Convert from LatexCommand form to collapsable form."
+    "Convert from LatexCommand form to collapsible form."
     i = 0
     r1 = re.compile('name "(.*)"')
     while True:
@@ -1112,7 +1095,7 @@ def convert_latexcommand_index(document):
 
 
 def revert_latexcommand_index(document):
-    "Revert from collapsable form to LatexCommand form."
+    "Revert from collapsible form to LatexCommand form."
     i = 0
     while True:
         i = find_token(document.body, "\\begin_inset Index", i)
@@ -1233,7 +1216,7 @@ def revert_japanese_encoding(document):
 def revert_inset_info(document):
     'Replace info inset with its content'
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, '\\begin_inset Info', i)
         if i == -1:
             return
@@ -1241,6 +1224,8 @@ def revert_inset_info(document):
         if j == -1:
             # should not happen
             document.warning("Malformed LyX document: Could not find end of Info inset.")
+            i += 1
+            continue
         type = 'unknown'
         arg = ''
         for k in range(i, j+1):
@@ -1573,10 +1558,10 @@ def convert_usorbian(document):
 
 
 def convert_macro_global(document):
-    "Remove TeX code command \global when it is in front of a macro"
+    r"Remove TeX code command \global when it is in front of a macro"
     # math macros are nowadays already defined \global, so that an additional
     # \global would make the document uncompilable, see
-    # http://bugzilla.lyx.org/show_bug.cgi?id=5371
+    # http://www.lyx.org/trac/ticket/5371
     # We're looking for something like this:
     # \begin_inset ERT
     # status collapsed
@@ -1712,7 +1697,7 @@ def convert_serbocroatian(document):
 def convert_framed_notes(document):
     "Convert framed notes to boxes. "
     i = 0
-    while 1:
+    while True:
         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
         if i == -1:
             return
@@ -1741,7 +1726,7 @@ def convert_module_names(document):
     return
   newmodlist = []
   for mod in modlist:
-    if modulemap.has_key(mod):
+    if mod in modulemap:
       newmodlist.append(modulemap[mod])
     else:
       document.warning("Can't find module %s in the module map!" % mod)
@@ -1760,7 +1745,7 @@ def revert_module_names(document):
     return
   newmodlist = []
   for mod in modlist:
-    if modulemap.has_key(mod):
+    if mod in modulemap:
       newmodlist.append(modulemap[mod])
     else:
       document.warning("Can't find module %s in the module map!" % mod)
@@ -1789,7 +1774,7 @@ def revert_colsep(document):
 def revert_framed_notes(document):
     "Revert framed boxes to notes. "
     i = 0
-    while 1:
+    while True:
         i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i)
 
         if i == -1:
@@ -1798,19 +1783,24 @@ def revert_framed_notes(document):
         if j == -1:
             # should not happen
             document.warning("Malformed LyX document: Could not find end of Box inset.")
+            i += 1
+            continue
         k = find_token(document.body, "status", i + 1, j)
         if k == -1:
             document.warning("Malformed LyX document: Missing `status' tag in Box inset.")
-            return
+            i = j
+            continue
         status = document.body[k]
         l = find_default_layout(document, i + 1, j)
         if l == -1:
             document.warning("Malformed LyX document: Missing `\\begin_layout' in Box inset.")
-            return
+            i = j
+            continue
         m = find_token(document.body, "\\end_layout", i + 1, j)
         if m == -1:
             document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.")
-            return
+            i = j
+            continue
         ibox = find_token(document.body, "has_inner_box 1", i + 1, k)
         pbox = find_token(document.body, "use_parbox 1", i + 1, k)
         if ibox == -1 and pbox == -1:
@@ -1873,7 +1863,8 @@ def revert_nobreakdash(document):
             j = find_token(document.header, "\\use_amsmath", 0)
             if j == -1:
                 document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
-                return
+                i += 1
+                continue
             document.header[j] = "\\use_amsmath 2"
         else:
             i = i + 1
@@ -1901,7 +1892,7 @@ def revert_nocite_key(body, start, end):
 def revert_nocite(document):
     "Revert LatexCommand nocite to ERT"
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
         if i == -1:
             return
@@ -2019,7 +2010,7 @@ def revert_serbianlatin(document):
 def revert_rotfloat(document):
     " Revert sideways custom floats. "
     i = 0
-    while 1:
+    while True:
         # whitespace intended (exclude \\begin_inset FloatList)
         i = find_token(document.body, "\\begin_inset Float ", i)
         if i == -1:
@@ -2047,7 +2038,8 @@ def revert_rotfloat(document):
         l = find_default_layout(document, i + 1, j)
         if l == -1:
             document.warning("Malformed LyX document: Missing `\\begin_layout' in Float inset.")
-            return
+            i = j
+            continue
         subst = ['\\begin_layout Standard',
                   '\\begin_inset ERT',
                   'status collapsed', '',
@@ -2081,7 +2073,7 @@ def revert_rotfloat(document):
 def revert_widesideways(document):
     " Revert wide sideways floats. "
     i = 0
-    while 1:
+    while True:
         # whitespace intended (exclude \\begin_inset FloatList)
         i = find_token(document.body, '\\begin_inset Float ', i)
         if i == -1:
@@ -2109,7 +2101,8 @@ def revert_widesideways(document):
         l = find_default_layout(document, i + 1, j)
         if l == -1:
             document.warning("Malformed LyX document: Missing `\\begin_layout' in Float inset.")
-            return
+            i = j
+            continue
         subst = ['\\begin_layout Standard', '\\begin_inset ERT',
                   'status collapsed', '',
                   '\\begin_layout Standard', '', '', '\\backslash',
@@ -2132,7 +2125,7 @@ def revert_widesideways(document):
 def revert_inset_embedding(document, type):
     ' Remove embed tag from certain type of insets'
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset %s" % type, i)
         if i == -1:
             return
@@ -2157,7 +2150,7 @@ def revert_external_embedding(document):
 def convert_subfig(document):
     " Convert subfigures to subfloats. "
     i = 0
-    while 1:
+    while True:
         addedLines = 0
         i = find_token(document.body, '\\begin_inset Graphics', i)
         if i == -1:
@@ -2197,7 +2190,7 @@ def convert_subfig(document):
 def revert_subfig(document):
     " Revert subfloats. "
     i = 0
-    while 1:
+    while True:
         # whitespace intended (exclude \\begin_inset FloatList)
         i = find_tokens(document.body, ['\\begin_inset Float ', '\\begin_inset Wrap'], i)
         if i == -1:
@@ -2268,7 +2261,7 @@ def revert_subfig(document):
                 if opt != -1:
                     optend = find_end_of_inset(document.body, opt)
                     if optend == -1:
-                        document.warning("Malformed lyx document: Missing '\\end_inset' (OptArg).")
+                        document.warning("Malformed LyX document: Missing '\\end_inset' (OptArg).")
                         return
                     optc = find_default_layout(document, opt, optend)
                     if optc == -1:
@@ -2346,7 +2339,7 @@ def revert_wrapplacement(document):
 
 
 def remove_extra_embedded_files(document):
-    " Remove \extra_embedded_files from buffer params "
+    r" Remove \extra_embedded_files from buffer params "
     i = find_token(document.header, '\\extra_embedded_files', 0)
     if i == -1:
         return
@@ -2378,6 +2371,7 @@ def revert_spaceinset(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         document.body[i] = document.body[i].replace('\\begin_inset Space', '\\InsetSpace')
         del document.body[j]
@@ -2480,6 +2474,7 @@ def revert_protected_hfill(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         del document.body[j]
         subst = document.body[i].replace('\\begin_inset Space \\hspace*{\\fill}', \
@@ -2501,6 +2496,7 @@ def revert_leftarrowfill(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         del document.body[j]
         subst = document.body[i].replace('\\begin_inset Space \\leftarrowfill{}', \
@@ -2522,6 +2518,7 @@ def revert_rightarrowfill(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         del document.body[j]
         subst = document.body[i].replace('\\begin_inset Space \\rightarrowfill{}', \
@@ -2543,6 +2540,7 @@ def revert_upbracefill(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         del document.body[j]
         subst = document.body[i].replace('\\begin_inset Space \\upbracefill{}', \
@@ -2564,6 +2562,7 @@ def revert_downbracefill(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
             continue
         del document.body[j]
         subst = document.body[i].replace('\\begin_inset Space \\downbracefill{}', \
@@ -2631,6 +2630,7 @@ def revert_pagebreaks(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of Newpage inset.")
+            i += 1
             continue
         del document.body[j]
         document.body[i] = document.body[i].replace('\\begin_inset Newpage newpage', '\\newpage')
@@ -2667,6 +2667,7 @@ def revert_linebreaks(document):
         j = find_end_of_inset(document.body, i)
         if j == -1:
             document.warning("Malformed LyX document: Could not find end of Newline inset.")
+            i += 1
             continue
         del document.body[j]
         document.body[i] = document.body[i].replace('\\begin_inset Newline newline', '\\newline')
@@ -2693,7 +2694,7 @@ def convert_japanese_plain(document):
 def revert_pdfpages(document):
     ' Revert pdfpages external inset to ERT '
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset External", i)
         if i == -1:
             return
@@ -2795,7 +2796,7 @@ def revert_master(document):
 def revert_graphics_group(document):
     ' Revert group information from graphics insets '
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_inset Graphics", i)
         if i == -1:
             return
@@ -2825,7 +2826,7 @@ def update_apa_styles(document):
                     "Paragraph*":      "Paragraph",
                     "Subparagraph*":   "Subparagraph"}
     i = 0
-    while 1:
+    while True:
         i = find_token(document.body, "\\begin_layout", i)
         if i == -1:
             return
@@ -2839,7 +2840,7 @@ def update_apa_styles(document):
 
 def convert_paper_sizes(document):
     ' exchange size options legalpaper and executivepaper to correct order '
-    # routine is needed to fix http://bugzilla.lyx.org/show_bug.cgi?id=4868
+    # routine is needed to fix http://www.lyx.org/trac/ticket/4868
     i = 0
     j = 0
     i = find_token(document.header, "\\papersize executivepaper", 0)