When cleaning up before quitting, take care of exceptions

[lyx.git] / lib / lyx2lyx / parser_tools.py
diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py

index c8fa4e1a17cc0e8b3521624e2eea6f8e29d29cd3..d208c06412f129f8676dcf427ef537b5b8c08987 100644 (file)
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@@ -1,6 +1,6 @@
  # This file is part of lyx2lyx
  # -*- coding: utf-8 -*-
-# Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>, 
+# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>, 
  # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
  #
  # This program is free software; you can redistribute it and/or
@@ -15,31 +15,35 @@
  #
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  
  
  ''' 
-This modules offer several free functions to help parse lines.
+This module offers several free functions to help parse lines.
  More documentaton is below, but here is a quick guide to what 
  they do. Optional arguments are marked by brackets.
  
  find_token(lines, token, start[, end[, ignorews]]):
    Returns the first line i, start <= i < end, on which
    token is found at the beginning. Returns -1 if not 
-  found. If ignorews is (given and) True, then differences
-  in whitespace do not count.
+  found. 
+  If ignorews is (given and) True, then differences
+  in whitespace do not count, except that there must be no 
+  extra whitespace following token itself.
  
  find_token_exact(lines, token, start[, end]):
-  Badly named. As find_token, but with ignorews True.
+  As find_token, but with ignorews set to True.
  
  find_tokens(lines, tokens, start[, end[, ignorews]]):
    Returns the first line i, start <= i < end, on which
-  oen of the tokens in tokens is found at the beginning. 
-  Returns -1 if not found. If ignorews is (given and) True, 
-  then differences in whitespace do not count.
+  one of the tokens in tokens is found at the beginning. 
+  Returns -1 if not found. 
+  If ignorews is (given and) True, then differences
+  in whitespace do not count, except that there must be no 
+  extra whitespace following token itself.
  
  find_tokens_exact(lines, token, start[, end]):
-  Badly named. As find_tokens, but with ignorews True.
+  As find_tokens, but with ignorews True.
    
  find_token_backwards(lines, token, start):
  find_tokens_backwards(lines, tokens, start):
@@ -52,29 +56,37 @@ find_re(lines, rexp, start[, end]):
  get_value(lines, token, start[, end[, default]):
    Similar to find_token, but it returns what follows the 
    token on the found line. Example:
-    get_value(document.header, "\use_xetex", 0)
+    get_value(document.header, "\\use_xetex", 0)
    will find a line like:
-    \use_xetex true
+    \\use_xetex true
    and, in that case, return "true". (Note that whitespace
    is stripped.) The final argument, default, defaults to "", 
    and is what is returned if we do not find anything. So you
    can use that to set a default.
    
-get_quoted_value(lines, token, start[, end[, default]):
+get_quoted_value(lines, token, start[, end[, default]]):
    Similar to get_value, but it will strip quotes off the
    value, if they are present. So use this one for cases
    where the value is normally quoted.
  
+get_option_value(line, option):
+  This assumes we have a line with something like:
+      option="value"
+  and returns value. Returns "" if not found.
+
+get_bool_value(lines, token, start[, end[, default]]):
+  Like get_value, but returns a boolean.
+
  del_token(lines, token, start[, end]):
    Like find_token, but deletes the line if it finds one.
    Returns True if a line got deleted, otherwise False.
  
  find_beginning_of(lines, i, start_token, end_token):
    Here, start_token and end_token are meant to be a matching 
-  pair, like "\begin_layout" and "\end_layout". We look for 
+  pair, like "\\begin_layout" and "\\end_layout". We look for 
    the start_token that pairs with the end_token that occurs
    on or after line i. Returns -1 if not found.
-  So, in the layout case, this would find the \begin_layout 
+  So, in the layout case, this would find the \\begin_layout 
    for the layout line i is in. 
    Example:
      ec = find_token(document.body, "</cell", i)
@@ -95,6 +107,12 @@ find_end_of_inset(lines, i):
  find_end_of_layout(lines, i):
    Specialization of find_end_of for layouts.
  
+find_end_of_sequence(lines, i):
+  Find the end of the sequence of layouts of the same kind.
+  Considers nesting. If the last paragraph in sequence is nested,
+  the position of the last \end_deeper is returned, else
+  the position of the last \end_layout.
+
  is_in_inset(lines, i, inset):
    Checks if line i is in an inset of the given type.
    If so, returns starting and ending lines. Otherwise, 
@@ -110,7 +128,7 @@ is_in_inset(lines, i, inset):
  
  get_containing_inset(lines, i):
    Finds out what kind of inset line i is within. Returns a 
-  list containing what follows \begin_inset on the the line 
+  list containing what follows \begin_inset on the line 
    on which the inset begins, plus the starting and ending line.
    Returns False on any kind of error or if it isn't in an inset.
    So get_containing_inset(document.body, i) might return:
@@ -119,8 +137,8 @@ get_containing_inset(lines, i):
    on line 306.
  
  get_containing_layout(lines, i):
-  As get_containing_inset, but for layout.
-
+  As get_containing_inset, but for layout. Additionally returns the
+  position of real paragraph start (after par params) as 4th value.
  
  find_nonempty_line(lines, start[, end):
    Finds the next non-empty line.
@@ -131,8 +149,13 @@ check_token(line, token):
  is_nonempty_line(line):
    Does line contain something besides whitespace?
  
+count_pars_in_inset(lines, i):
+  Counts the paragraphs inside an inset.
+
  '''
  
+import re
+
  # Utilities for one line
  def check_token(line, token):
      """ check_token(line, token) -> bool
@@ -159,14 +182,15 @@ def find_token(lines, token, start, end = 0, ignorews = False):
      element, in lines[start, end].
      
      If ignorews is True (default is False), then differences in
-    whitespace are ignored.
+    whitespace are ignored, except that there must be no extra
+    whitespace following token itself.
  
      Return -1 on failure."""
  
      if end == 0 or end > len(lines):
          end = len(lines)
      m = len(token)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if ignorews:
              x = lines[i].split()
              y = token.split()
@@ -194,7 +218,7 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False):
      if end == 0 or end > len(lines):
          end = len(lines)
  
-    for i in xrange(start, end):
+    for i in range(start, end):
          for token in tokens:
              if ignorews:
                  x = lines[i].split()
@@ -223,7 +247,7 @@ def find_re(lines, rexp, start, end = 0):
  
      if end == 0 or end > len(lines):
          end = len(lines)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if rexp.match(lines[i]):
                  return i
      return -1
@@ -237,7 +261,7 @@ def find_token_backwards(lines, token, start):
  
      Return -1 on failure."""
      m = len(token)
-    for i in xrange(start, -1, -1):
+    for i in range(start, -1, -1):
          line = lines[i]
          if line[:m] == token:
              return i
@@ -251,7 +275,7 @@ def find_tokens_backwards(lines, tokens, start):
      element, in lines[end, start].
  
      Return -1 on failure."""
-    for i in xrange(start, -1, -1):
+    for i in range(start, -1, -1):
          line = lines[i]
          for token in tokens:
              if line[:len(token)] == token:
@@ -294,6 +318,43 @@ def get_quoted_value(lines, token, start, end = 0, default = ""):
      return val.strip('"')
  
  
+def get_bool_value(lines, token, start, end = 0, default = None):
+    """ get_value(lines, token, start[[, end], default]) -> string
+
+    Find the next line that looks like:
+      token bool_value
+
+    Returns True if bool_value is 1 or true and
+    False if bool_value is 0 or false
+    """
+
+    val = get_quoted_value(lines, token, start, end, "")
+
+    if val == "1" or val == "true":
+        return True
+    if val == "0" or val == "false":
+        return False
+    return default
+
+
+def get_option_value(line, option):
+    rx = option + '\s*=\s*"([^"]+)"'
+    rx = re.compile(rx)
+    m = rx.search(line)
+    if not m:
+      return ""
+    return m.group(1)
+
+
+def set_option_value(line, option, value):
+    rx = '(' + option + '\s*=\s*")[^"]+"'
+    rx = re.compile(rx)
+    m = rx.search(line)
+    if not m:
+        return line
+    return re.sub(rx, '\g<1>' + value + '"', line)
+
+
  def del_token(lines, token, start, end = 0):
      """ del_token(lines, token, start, end) -> int
  
@@ -342,7 +403,7 @@ def find_end_of(lines, i, start_token, end_token):
  def find_nonempty_line(lines, start, end = 0):
      if end == 0:
          end = len(lines)
-    for i in xrange(start, end):
+    for i in range(start, end):
          if is_nonempty_line(lines[i]):
              return i
      return -1
@@ -386,16 +447,23 @@ def is_in_inset(lines, i, inset):
  def get_containing_inset(lines, i):
    ''' 
    Finds out what kind of inset line i is within. Returns a 
-  list containing (i) what follows \begin_inset on the the line 
+  list containing (i) what follows \begin_inset on the line
    on which the inset begins, plus the starting and ending line.
    Returns False on any kind of error or if it isn't in an inset.
    '''
-  stins = find_token_backwards(lines, i, "\\begin_inset")
-  if stins == -1:
-      return False
-  endins = find_end_of_inset(lines, stins)
+  j = i
+  while True:
+      stins = find_token_backwards(lines, "\\begin_inset", j)
+      if stins == -1:
+          return False
+      endins = find_end_of_inset(lines, stins)
+      if endins > j:
+          break
+      j = stins - 1
+
    if endins < i:
        return False
+
    inset = get_value(lines, "\\begin_inset", stins)
    if inset == "":
        # shouldn't happen
@@ -406,18 +474,83 @@ def get_containing_inset(lines, i):
  def get_containing_layout(lines, i):
    ''' 
    Finds out what kind of layout line i is within. Returns a 
-  list containing (i) what follows \begin_layout on the the line 
-  on which the layout begins, plus the starting and ending line.
+  list containing what follows \begin_layout on the line
+  on which the layout begins, plus the starting and ending line
+  and the start of the paragraph (after all params). I.e, returns:
+    (layoutname, layoutstart, layoutend, startofcontent)
    Returns False on any kind of error.
    '''
-  stins = find_token_backwards(lines, i, "\\begin_layout")
-  if stins == -1:
-      return False
-  endins = find_end_of_layout(lines, stins)
-  if endins < i:
+  j = i
+  while True:
+      stlay = find_token_backwards(lines, "\\begin_layout", j)
+      if stlay == -1:
+          return False
+      endlay = find_end_of_layout(lines, stlay)
+      if endlay > i:
+          break
+      j = stlay - 1
+
+  if endlay < i:
        return False
-  lay = get_value(lines, "\\begin_layout", stins)
+
+  lay = get_value(lines, "\\begin_layout", stlay)
    if lay == "":
        # shouldn't happen
        return False
-  return (lay, stins, endins)
+  par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
+                "\\start_of_appendix", "\\paragraph_spacing", "\\align",
+                "\\labelwidthstring"]
+  stpar = stlay
+  while True:
+      stpar += 1
+      if lines[stpar].split(' ', 1)[0] not in par_params:
+          break
+  return (lay, stlay, endlay, stpar)
+
+
+def count_pars_in_inset(lines, i):
+  '''
+  Counts the paragraphs within this inset
+  '''
+  ins = get_containing_inset(lines, i)
+  if ins == -1:
+      return -1
+  pars = 0
+  for j in range(ins[1], ins[2]):
+      m = re.match(r'\\begin_layout (.*)', lines[j])
+      if m and get_containing_inset(lines, j)[0] == ins[0]:
+          pars += 1
+
+  return pars
+
+
+def find_end_of_sequence(lines, i):
+  '''
+  Returns the end of a sequence of identical layouts.
+  '''
+  lay = get_containing_layout(lines, i)
+  if lay == False:
+      return -1
+  layout = lay[0]
+  endlay = lay[2]
+  i = endlay
+  while True:
+      m = re.match(r'\\begin_layout (.*)', lines[i])
+      if m and m.group(1) != layout:
+          return endlay
+      elif lines[i] == "\\begin_deeper":
+          j = find_end_of(lines, i, "\\begin_deeper", "\\end_deeper")
+          if j != -1:
+              i = j
+              endlay = j
+              continue
+      if m and m.group(1) == layout:
+          endlay = find_end_of_layout(lines, i)
+          i = endlay
+          continue
+      if i == len(lines) - 1:
+          break
+      i = i + 1
+
+  return endlay
+