From: José Matox Date: Wed, 2 Aug 2006 14:25:43 +0000 (+0000) Subject: Start to consolidate the functions in parser tools. X-Git-Tag: 1.6.10~12890 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=552a471c99a23d1e36921d078f6491d1c0230900;p=features.git Start to consolidate the functions in parser tools. Add a unit testing for parser tools functions. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14536 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index e459cedd48..c0c891f534 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2002-2004 Dekel Tsur , José Matos +# -*- coding: utf-8 -*- +# Copyright (C) 2002-2004 Dekel Tsur , José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -16,65 +16,93 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -import string -import re +" This modules offer several free functions to help parse lines." +# Utilities for one line def check_token(line, token): - if line[:len(token)] == token: - return 1 - return 0 + """ check_token(line, token) -> bool + Return True if token is present in line and is the first element + else returns False.""" + + return line[:len(token)] == token + + +def is_nonempty_line(line): + """ is_nonempty_line(line) -> bool + + Return False if line is either empty or it has only whitespaces, + else return True.""" + return line != " "*len(line) + + +# Utilities for a list of lines +def find_token(lines, token, start, end = 0, exact = False): + """ find_token(lines, token, start[[, end], exact]) -> int + + Return the lowest line where token is found, and is the first + element, in lines[start, end]. + + Return -1 on failure.""" -# We need to check that the char after the token is space, but I think -# we can ignore this -def find_token(lines, token, start, end = 0): if end == 0: end = len(lines) m = len(token) for i in xrange(start, end): - if lines[i][:m] == token: - return i + if exact: + x = lines[i].split() + y = token.split() + if len(x) < len(y): + continue + if x[:len(y)] == y: + return i + else: + if lines[i][:m] == token: + return i return -1 def find_token_exact(lines, token, start, end = 0): - if end == 0: - end = len(lines) - for i in xrange(start, end): - x = string.split(lines[i]) - y = string.split(token) - if len(x) < len(y): - continue - if x[:len(y)] == y: - return i - return -1 + return find_token(lines, token, start, end, True) + +def find_tokens(lines, tokens, start, end = 0, exact = False): + """ find_tokens(lines, tokens, start[[, end], exact]) -> int -def find_tokens(lines, tokens, start, end = 0): + Return the lowest line where one token in tokens is found, and is + the first element, in lines[start, end]. + + Return -1 on failure.""" if end == 0: end = len(lines) + for i in xrange(start, end): for token in tokens: - if lines[i][:len(token)] == token: - return i + if exact: + x = lines[i].split() + y = token.split() + if len(x) < len(y): + continue + if x[:len(y)] == y: + return i + else: + if lines[i][:len(token)] == token: + return i return -1 def find_tokens_exact(lines, tokens, start, end = 0): - if end == 0: - end = len(lines) - for i in xrange(start, end): - for token in tokens: - x = string.split(lines[i]) - y = string.split(token) - if len(x) < len(y): - continue - if x[:len(y)] == y: - return i - return -1 + return find_tokens(lines, tokens, start, end, True) def find_re(lines, rexp, start, end = 0): + """ find_token_re(lines, rexp, start[, end]) -> int + + Return the lowest line where rexp, a regular expression, is found + in lines[start, end]. + + Return -1 on failure.""" + if end == 0: end = len(lines) for i in xrange(start, end): @@ -84,6 +112,12 @@ def find_re(lines, rexp, start, end = 0): def find_token_backwards(lines, token, start): + """ find_token_backwards(lines, token, start) -> int + + Return the highest line where token is found, and is the first + element, in lines[start, end]. + + Return -1 on failure.""" m = len(token) for i in xrange(start, -1, -1): line = lines[i] @@ -93,6 +127,12 @@ def find_token_backwards(lines, token, start): def find_tokens_backwards(lines, tokens, start): + """ find_tokens_backwards(lines, token, start) -> int + + Return the highest line where token is found, and is the first + element, in lines[end, start]. + + Return -1 on failure.""" for i in xrange(start, -1, -1): line = lines[i] for token in tokens: @@ -102,16 +142,28 @@ def find_tokens_backwards(lines, tokens, start): def get_value(lines, token, start, end = 0): + """ get_value(lines, token, start[, end]) -> list of strings + + Return tokens after token for the first line, in lines, where + token is the first element.""" + i = find_token_exact(lines, token, start, end) if i == -1: return "" - if len(string.split(lines[i])) > 1: - return string.split(lines[i])[1] + if len(lines[i].split()) > 1: + return lines[i].split()[1] else: return "" def del_token(lines, token, start, end): + """ del_token(lines, token, start, end) -> int + + Find the lower line in lines where token is the first element and + delete that line. + + Returns the number of lines remaining.""" + k = find_token_exact(lines, token, start, end) if k == -1: return end @@ -120,12 +172,11 @@ def del_token(lines, token, start, end): return end - 1 -def find_end_of(lines, i, start_token, end_token): +def find_beginning_of(lines, i, start_token, end_token): count = 1 - n = len(lines) - while i < n: - i = find_tokens(lines, [end_token, start_token], i+1) - if check_token(lines[i], start_token): + while i > 0: + i = find_tokens_backwards(lines, [start_token, end_token], i-1) + if check_token(lines[i], end_token): count = count+1 else: count = count-1 @@ -134,12 +185,12 @@ def find_end_of(lines, i, start_token, end_token): return -1 -# Finds the matching \end_inset -def find_beginning_of(lines, i, start_token, end_token): +def find_end_of(lines, i, start_token, end_token): count = 1 - while i > 0: - i = find_tokens_backwards(lines, [start_token, end_token], i-1) - if check_token(lines[i], end_token): + n = len(lines) + while i < n: + i = find_tokens(lines, [end_token, start_token], i+1) + if check_token(lines[i], start_token): count = count+1 else: count = count-1 @@ -148,10 +199,6 @@ def find_beginning_of(lines, i, start_token, end_token): return -1 -def is_nonempty_line(line): - return line != " "*len(line) - - def find_nonempty_line(lines, start, end = 0): if end == 0: end = len(lines) diff --git a/lib/lyx2lyx/test_parser_tools.py b/lib/lyx2lyx/test_parser_tools.py new file mode 100644 index 0000000000..b1c84eea52 --- /dev/null +++ b/lib/lyx2lyx/test_parser_tools.py @@ -0,0 +1,93 @@ +# This file is part of lyx2lyx +# -*- coding: utf-8 -*- +# Copyright (C) 2006 José Matos +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +" This modules tests the functions used to help parse lines." + +from parser_tools import * + +import unittest + +ug = r""" +\begin_layout Standard +The +\begin_inset Quotes eld +\end_inset + + +\emph on +Introduction +\emph default + +\begin_inset Quotes erd +\end_inset + + describes several things in addition to LyX's philosophy: most importantly, + the format of all of the manuals. + If you don't read it, you'll have a bear of a time navigating this manual. + You might also be better served looking in one of the other manuals instead + of this one. + +\begin_inset Quotes eld +\end_inset + + +\emph on +Introduction +\emph default + +\begin_inset Quotes erd +\end_inset + + describes that, too. +\end_layout + +""" + +lines = ug.splitlines() + +class TestParserTools(unittest.TestCase): + + def test_check_token(self): + line = "\\begin_layout Standard" + + self.assertEquals(check_token(line, '\\begin_layout'), True) + self.assertEquals(check_token(line, 'Standard'), False) + + + def test_is_nonempty_line(self): + self.assertEquals(is_nonempty_line(lines[0]), False) + self.assertEquals(is_nonempty_line(lines[1]), True) + self.assertEquals(is_nonempty_line(" "*5), False) + + + def test_find_token(self): + self.assertEquals(find_token(lines, '\\emph', 0), 7) + self.assertEquals(find_token(lines, '\\emph', 0, 5), -1) + self.assertEquals(find_token(lines, '\\emp', 0, exact = True), -1) + self.assertEquals(find_token(lines, '\\emp', 0, exact = False), 7) + self.assertEquals(find_token(lines, 'emph', 0), -1) + + + def test_find_tokens(self): + tokens = ['\\emph', '\\end_inset'] + self.assertEquals(find_tokens(lines, tokens, 0), 4) + self.assertEquals(find_tokens(lines, tokens, 0, 4), -1) + + +if __name__ == '__main__': + unittest.main()