When cleaning up before quitting, take care of exceptions

[lyx.git] / lib / lyx2lyx / parser_tools.py
diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py

index 9af9b60a7e5397cfb6258bae118169336b3f939f..d208c06412f129f8676dcf427ef537b5b8c08987 100644 (file)
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@@ -19,7 +19,7 @@
  
  
  ''' 
-This modules offer several free functions to help parse lines.
+This module offers several free functions to help parse lines.
  More documentaton is below, but here is a quick guide to what 
  they do. Optional arguments are marked by brackets.
  
@@ -32,11 +32,11 @@ find_token(lines, token, start[, end[, ignorews]]):
    extra whitespace following token itself.
  
  find_token_exact(lines, token, start[, end]):
-  As find_token, but with ignorews True.
+  As find_token, but with ignorews set to True.
  
  find_tokens(lines, tokens, start[, end[, ignorews]]):
    Returns the first line i, start <= i < end, on which
-  oen of the tokens in tokens is found at the beginning. 
+  one of the tokens in tokens is found at the beginning. 
    Returns -1 if not found. 
    If ignorews is (given and) True, then differences
    in whitespace do not count, except that there must be no 
@@ -56,15 +56,15 @@ find_re(lines, rexp, start[, end]):
  get_value(lines, token, start[, end[, default]):
    Similar to find_token, but it returns what follows the 
    token on the found line. Example:
-    get_value(document.header, "\use_xetex", 0)
+    get_value(document.header, "\\use_xetex", 0)
    will find a line like:
-    \use_xetex true
+    \\use_xetex true
    and, in that case, return "true". (Note that whitespace
    is stripped.) The final argument, default, defaults to "", 
    and is what is returned if we do not find anything. So you
    can use that to set a default.
    
-get_quoted_value(lines, token, start[, end[, default]):
+get_quoted_value(lines, token, start[, end[, default]]):
    Similar to get_value, but it will strip quotes off the
    value, if they are present. So use this one for cases
    where the value is normally quoted.
@@ -74,16 +74,19 @@ get_option_value(line, option):
        option="value"
    and returns value. Returns "" if not found.
  
+get_bool_value(lines, token, start[, end[, default]]):
+  Like get_value, but returns a boolean.
+
  del_token(lines, token, start[, end]):
    Like find_token, but deletes the line if it finds one.
    Returns True if a line got deleted, otherwise False.
  
  find_beginning_of(lines, i, start_token, end_token):
    Here, start_token and end_token are meant to be a matching 
-  pair, like "\begin_layout" and "\end_layout". We look for 
+  pair, like "\\begin_layout" and "\\end_layout". We look for 
    the start_token that pairs with the end_token that occurs
    on or after line i. Returns -1 if not found.
-  So, in the layout case, this would find the \begin_layout 
+  So, in the layout case, this would find the \\begin_layout 
    for the layout line i is in. 
    Example:
      ec = find_token(document.body, "</cell", i)
@@ -106,7 +109,9 @@ find_end_of_layout(lines, i):
  
  find_end_of_sequence(lines, i):
    Find the end of the sequence of layouts of the same kind.
-  Considers nesting.
+  Considers nesting. If the last paragraph in sequence is nested,
+  the position of the last \end_deeper is returned, else
+  the position of the last \end_layout.
  
  is_in_inset(lines, i, inset):
    Checks if line i is in an inset of the given type.
@@ -123,7 +128,7 @@ is_in_inset(lines, i, inset):
  
  get_containing_inset(lines, i):
    Finds out what kind of inset line i is within. Returns a 
-  list containing what follows \begin_inset on the the line 
+  list containing what follows \begin_inset on the line 
    on which the inset begins, plus the starting and ending line.
    Returns False on any kind of error or if it isn't in an inset.
    So get_containing_inset(document.body, i) might return:
@@ -135,7 +140,6 @@ get_containing_layout(lines, i):
    As get_containing_inset, but for layout. Additionally returns the
    position of real paragraph start (after par params) as 4th value.
  
-
  find_nonempty_line(lines, start[, end):
    Finds the next non-empty line.
  
@@ -186,7 +190,7 @@ def find_token(lines, token, start, end = 0, ignorews = False):
      if end == 0 or end > len(lines):
          end = len(lines)
      m = len(token)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if ignorews:
              x = lines[i].split()
              y = token.split()
@@ -214,7 +218,7 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
      if end == 0 or end > len(lines):
          end = len(lines)
  
-    for i in xrange(start, end):
+    for i in range(start, end):
          for token in tokens:
              if ignorews:
                  x = lines[i].split()
@@ -243,7 +247,7 @@ def find_re(lines, rexp, start, end = 0):
  
      if end == 0 or end > len(lines):
          end = len(lines)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if rexp.match(lines[i]):
                  return i
      return -1
@@ -257,7 +261,7 @@ def find_token_backwards(lines, token, start):
  
      Return -1 on failure."""
      m = len(token)
-    for i in xrange(start, -1, -1):
+    for i in range(start, -1, -1):
          line = lines[i]
          if line[:m] == token:
              return i
@@ -271,7 +275,7 @@ def find_tokens_backwards(lines, tokens, start):
      element, in lines[end, start].
  
      Return -1 on failure."""
-    for i in xrange(start, -1, -1):
+    for i in range(start, -1, -1):
          line = lines[i]
          for token in tokens:
              if line[:len(token)] == token:
@@ -314,6 +318,25 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
      return val.strip('"')
  
  
+def get_bool_value(lines, token, start, end = 0, default = None):
+    """ get_value(lines, token, start[[, end], default]) -> string
+
+    Find the next line that looks like:
+      token bool_value
+
+    Returns True if bool_value is 1 or true and
+    False if bool_value is 0 or false
+    """
+
+    val = get_quoted_value(lines, token, start, end, "")
+
+    if val == "1" or val == "true":
+        return True
+    if val == "0" or val == "false":
+        return False
+    return default
+
+
  def get_option_value(line, option):
      rx = option + '\s*=\s*"([^"]+)"'
      rx = re.compile(rx)
@@ -380,7 +403,7 @@ def find_end_of(lines, i, start_token, end_token):
  def find_nonempty_line(lines, start, end = 0):
      if end == 0:
          end = len(lines)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if is_nonempty_line(lines[i]):
              return i
      return -1
@@ -424,7 +447,7 @@ def is_in_inset(lines, i, inset):
  def get_containing_inset(lines, i):
    ''' 
    Finds out what kind of inset line i is within. Returns a 
-  list containing (i) what follows \begin_inset on the the line 
+  list containing (i) what follows \begin_inset on the line
    on which the inset begins, plus the starting and ending line.
    Returns False on any kind of error or if it isn't in an inset.
    '''
@@ -437,7 +460,10 @@ def get_containing_inset(lines, i):
        if endins > j:
            break
        j = stins - 1
-  
+
+  if endins < i:
+      return False
+
    inset = get_value(lines, "\\begin_inset", stins)
    if inset == "":
        # shouldn't happen
@@ -448,9 +474,10 @@ def get_containing_inset(lines, i):
  def get_containing_layout(lines, i):
    ''' 
    Finds out what kind of layout line i is within. Returns a 
-  list containing (i) what follows \begin_layout on the the line 
+  list containing what follows \begin_layout on the line
    on which the layout begins, plus the starting and ending line
-  and the start of the apargraph (after all params).
+  and the start of the paragraph (after all params). I.e, returns:
+    (layoutname, layoutstart, layoutend, startofcontent)
    Returns False on any kind of error.
    '''
    j = i
@@ -462,25 +489,27 @@ def get_containing_layout(lines, i):
        if endlay > i:
            break
        j = stlay - 1
-  
+
+  if endlay < i:
+      return False
+
    lay = get_value(lines, "\\begin_layout", stlay)
    if lay == "":
        # shouldn't happen
        return False
    par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
-                "\\start_of_appendix", "\\paragraph_spacing single",
-                "\\paragraph_spacing onehalf", "\\paragraph_spacing double",
-                "\\paragraph_spacing other", "\\align", "\\labelwidthstring"]
+                "\\start_of_appendix", "\\paragraph_spacing", "\\align",
+                "\\labelwidthstring"]
    stpar = stlay
    while True:
        stpar += 1
-      if lines[stpar] not in par_params:
+      if lines[stpar].split(' ', 1)[0] not in par_params:
            break
    return (lay, stlay, endlay, stpar)
  
  
  def count_pars_in_inset(lines, i):
-  ''' 
+  '''
    Counts the paragraphs within this inset
    '''
    ins = get_containing_inset(lines, i)
@@ -491,12 +520,12 @@ def count_pars_in_inset(lines, i):
        m = re.match(r'\\begin_layout (.*)', lines[j])
        if m and get_containing_inset(lines, j)[0] == ins[0]:
            pars += 1
-  
+
    return pars
  
  
  def find_end_of_sequence(lines, i):
-  ''' 
+  '''
    Returns the end of a sequence of identical layouts.
    '''
    lay = get_containing_layout(lines, i)
@@ -513,6 +542,7 @@ def find_end_of_sequence(lines, i):
            j = find_end_of(lines, i, "\\begin_deeper", "\\end_deeper")
            if j != -1:
                i = j
+              endlay = j
                continue
        if m and m.group(1) == layout:
            endlay = find_end_of_layout(lines, i)