# insert_to_preamble, latex_length, revert_flex_inset,
# revert_font_attrs, hex2ratio, str2bool
-from parser_tools import (find_end_of_inset, find_end_of_layout,
- find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
- get_containing_layout, get_value, check_token)
+from parser_tools import (del_complete_lines,
+ find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
+ find_token, find_token_backwards, get_containing_layout,
+ get_value, check_token)
####################################################################
# Private helper functions
if document.backend != "latex":
return
+ lines = document.body
i = 0
- while i < len(document.body):
- words = document.body[i].split()
+ while i+1 < len(lines):
+ i += 1
+ line = lines[i]
+ words = line.split()
if (len(words) > 1 and words[0] == "\\begin_inset"
and (words[1] in ["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]
- or ' '.join(words[1:]) == "Flex Code")):
+ or line.endswith("Flex Code"))):
# must not replace anything in insets that store LaTeX contents in .lyx files
# (math and command insets without overridden read() and write() methods
# filtering out IPA makes Text::readParToken() more simple
# skip ERT as well since it is not needed there
# Flex Code is logical markup, typically rendered as typewriter
- j = find_end_of_inset(document.body, i)
+ j = find_end_of_inset(lines, i)
if j == -1:
- document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
- i += 1
+ document.warning("Malformed LyX document: Can't find end of " +
+ words[1] + " inset at line " + str(i))
else:
i = j
continue
- if document.body[i] == "\\begin_layout LyX-Code":
- j = find_end_of_layout(document.body, i)
+ if lines[i] == "\\begin_layout LyX-Code":
+ j = find_end_of_layout(lines, i)
if j == -1:
document.warning("Malformed LyX document: "
"Can't find end of %s layout at line %d" % (words[1],i))
- i += 1
else:
i = j
continue
-
- if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
- # skip paragraph parameters (bug 10243)
- i += 1
+ if line.startswith("\\labelwidthstring"):
+ # skip label width string (bug 10243)
continue
- while True:
- j = document.body[i].find("--")
- if j == -1:
- break
- front = document.body[i][:j]
- back = document.body[i][j+2:]
- # We can have an arbitrary number of consecutive hyphens.
- # These must be split into the corresponding number of two and three hyphens
- # We must match what LaTeX does: First try emdash, then endash, then single hyphen
- if back.find("-") == 0:
- back = back[1:]
- if len(back) > 0:
- document.body.insert(i+1, back)
- document.body[i] = front + "\\threehyphens"
- else:
- if len(back) > 0:
- document.body.insert(i+1, back)
- document.body[i] = front + "\\twohyphens"
- i += 1
- i = 0
- while i < len(document.body):
- line = document.body[i]
- while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
- document.body[i+1].startswith("-")):
- line = line.replace(r"\SpecialChar \textcompwordmark{}",
- document.body.pop(i+1))
- document.body[i] = line
- i += 1
+ if "--" in line:
+ # We can have an arbitrary number of consecutive hyphens.
+ # Replace as LaTeX does: First try emdash, then endash
+ line = line.replace("---", "\\threehyphens\n")
+ line = line.replace("--", "\\twohyphens\n")
+ lines[i:i+1] = line.splitlines()
+
+ # remove ligature breaks between dashes
+ i = 1
+ while i < len(lines):
+ line = lines[i]
+ if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
+ lines[i+1].startswith("-")):
+ lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
+ lines.pop(i+1))
+ else:
+ i += 1
-# Return number of the next line to check for dashes.
-def _dashes_next_line(document, i):
- i +=1
- words = document.body[i].split()
- # skip paragraph parameters (bug 10243):
- if words and words[0] in ["\\leftindent", "\\paragraph_spacing",
- "\\align", "\\labelwidthstring"]:
- i += 1
- words = document.body[i].split()
- # some insets should be skipped in revert_dashes (cf. convert_dashes)
- if (len(words) > 1 and words[0] == "\\begin_inset" and
- words[1] in ["CommandInset", "ERT", "External", "Formula",
- "FormulaMacro", "Graphics", "IPA", "listings"]):
- j = find_end_of_inset(document.body, i)
- if j == -1:
- document.warning("Malformed LyX document: Can't find end of "
- + words[1] + " inset at line " + str(i))
- return i
- return j+1
- return i
def revert_dashes(document):
"""
Prevent ligatures of existing --- and --.
- Convert \\twohyphens and \\threehyphens to -- and ---.
+ Revert \\twohyphens and \\threehyphens to -- and ---.
Remove preamble code from 2.3->2.2 conversion.
"""
- # Remove preamble code from 2.3->2.2 conversion:
- dash_renew_lines = find_slice(document.preamble,
- ['% Added by lyx2lyx',
- r'\renewcommand{\textendash}{--}',
- r'\renewcommand{\textemdash}{---}'])
- del(document.preamble[dash_renew_lines])
- # Prevent ligation of hyphens:
+ del_complete_lines(document.preamble,
+ ['% Added by lyx2lyx',
+ r'\renewcommand{\textendash}{--}',
+ r'\renewcommand{\textemdash}{---}'])
+ # Insert ligature breaks to prevent ligation of hyphens to dashes:
+ lines = document.body
i = 0
- while i < len(document.body)-1:
- # increment i, skip some insets (cf. convert_dashes)
- i = _dashes_next_line(document, i)
- line = document.body[i]
+ while i+1 < len(lines):
+ i += 1
+ line = lines[i]
+ # skip label width string (bug 10243):
+ if line.startswith("\\labelwidthstring"):
+ continue
+ # do not touch hyphens in some insets (cf. convert_dashes):
+ if line.startswith("\\begin_inset"):
+ try:
+ if line.split()[1] in ["CommandInset", "ERT", "External",
+ "Formula", "FormulaMacro", "Graphics",
+ "IPA", "listings"]:
+ j = find_end_of_inset(lines, i)
+ if j == -1:
+ document.warning("Malformed LyX document: Can't find "
+ "end of %s inset at line %d." % (itype, i))
+ continue
+ i = j
+ except IndexError:
+ continue
if "--" in line:
line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
document.body[i:i+1] = line.split('\n')
- # Convert \twohyphens and \threehyphens:
- i = 0
- while i < len(document.body):
- # skip some insets (see convert_dashes())
- i = _dashes_next_line(document, i-1)
- replaced = False
- if document.body[i].find("\\twohyphens") >= 0:
- document.body[i] = document.body[i].replace("\\twohyphens", "--")
- replaced = True
- if document.body[i].find("\\threehyphens") >= 0:
- document.body[i] = document.body[i].replace("\\threehyphens", "---")
- replaced = True
- if replaced and i+1 < len(document.body) and \
- (document.body[i+1].find("\\") != 0 or \
- document.body[i+1].find("\\twohyphens") == 0 or
- document.body[i+1].find("\\threehyphens") == 0) and \
- len(document.body[i]) + len(document.body[i+1]) <= 80:
- document.body[i] = document.body[i] + document.body[i+1]
- document.body[i+1:i+2] = []
+ # Revert \twohyphens and \threehyphens:
+ i = 1
+ while i < len(lines):
+ line = lines[i]
+ if not line.endswith("hyphens"):
+ i +=1
+ elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
+ line = line.replace("\\twohyphens", "--")
+ line = line.replace("\\threehyphens", "---")
+ lines[i] = line + lines.pop(i+1)
else:
i += 1
document.language = "english"
i = find_token(document.header, "\\language georgian", 0)
if i != -1:
- document.header[i] = "\\language english"
+ document.header[i] = "\\language english"
j = find_token(document.header, "\\language_package default", 0)
if j != -1:
- document.header[j] = "\\language_package babel"
+ document.header[j] = "\\language_package babel"
k = find_token(document.header, "\\options", 0)
if k != -1:
- document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
+ document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
else:
- l = find_token(document.header, "\\use_default_options", 0)
- document.header.insert(l + 1, "\\options georgian")
+ l = find_token(document.header, "\\use_default_options", 0)
+ document.header.insert(l + 1, "\\options georgian")
def revert_sigplan_doi(document):
# Uncomment only what you need to import, please.
-from parser_tools import del_token, find_end_of, find_end_of_layout, \
- find_end_of_inset, find_re, find_slice, find_token, \
- find_token_backwards, get_containing_layout, \
+from parser_tools import del_token, del_value, del_complete_lines, \
+ find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
+ find_token, find_token_backwards, get_containing_layout, \
get_bool_value, get_value, get_quoted_value
# find_tokens, find_token_exact, is_in_inset, \
# check_token, get_option_value
"Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
"footcitet", "footcitep", "footcitealt", "footcitealp",\
"footciteauthor", "footciteyear", "footciteyearpar",\
- "citefield", "citetitle", "cite*" ]
+ "citefield", "citetitle", "cite*" ]
i = 0
while (True):
def convert_dashligatures(document):
"Set 'use_dash_ligatures' according to content."
- use_dash_ligatures = None
- # Eventually remove preamble code from 2.3->2.2 conversion:
- dash_renew_lines = find_slice(document.preamble,
- ['% Added by lyx2lyx',
- r'\renewcommand{\textendash}{--}',
- r'\renewcommand{\textemdash}{---}'])
- del(document.preamble[dash_renew_lines])
- use_dash_ligatures = bool(dash_renew_lines.stop)
+ # Look for and remove dashligatures workaround from 2.3->2.2 reversion,
+ # set use_dash_ligatures to True if found, to None else.
+ use_dash_ligatures = del_complete_lines(document.preamble,
+ ['% Added by lyx2lyx',
+ r'\renewcommand{\textendash}{--}',
+ r'\renewcommand{\textemdash}{---}']) or None
if use_dash_ligatures is None:
- # Look for dashes:
- # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
- # as interim representation for dash ligatures)
+ # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
+ # or "\threehyphens\n" as interim representation for -- an ---.)
has_literal_dashes = False
has_ligature_dashes = False
j = 0
# Skip some document parts where dashes are not converted
if (i < j) or line.startswith("\\labelwidthstring"):
continue
- words = line.split()
- if (len(words) > 1 and words[0] == "\\begin_inset"
- and (words[1] in ["CommandInset", "ERT", "External", "Formula",
- "FormulaMacro", "Graphics", "IPA", "listings"]
- or ' '.join(words[1:]) == "Flex Code")):
- j = find_end_of_inset(document.body, i)
- if j == -1:
- document.warning("Malformed LyX document: "
- "Can't find end of %s inset at line %d" % (words[1],i))
- continue
+ if line.startswith("\\begin_inset"):
+ try:
+ it = line.split()[1]
+ except IndexError:
+ continue
+ if (it in ["CommandInset", "ERT", "External", "Formula",
+ "FormulaMacro", "Graphics", "IPA", "listings"]
+ or line.endswith("Flex Code")):
+ j = find_end_of_inset(document.body, i)
+ if j == -1:
+ document.warning("Malformed LyX document: Can't "
+ "find end of %s inset at line %d." % (itype, i))
+ continue
if line == "\\begin_layout LyX-Code":
j = find_end_of_layout(document.body, i)
if j == -1:
use_dash_ligatures = True
# insert the setting if there is a preferred value
if use_dash_ligatures is not None:
- i = find_token(document.header, "\\use_microtype", 0)
- if i != -1:
- document.header.insert(i+1, "\\use_dash_ligatures %s"
- % str(use_dash_ligatures).lower())
+ i = find_token(document.header, "\\graphics")
+ document.header.insert(i, "\\use_dash_ligatures %s"
+ % str(use_dash_ligatures).lower())
def revert_dashligatures(document):
"""Remove font ligature settings for en- and em-dashes.
Revert conversion of \twodashes or \threedashes to literal dashes."""
- i = find_token(document.header, "\\use_dash_ligatures", 0)
- if i == -1:
- return
- use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
- del document.header[i]
- if not use_dash_ligatures or document.backend != "latex":
+ use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
+ if use_dash_ligatures != "true" or document.backend != "latex":
return
-
j = 0
new_body = []
for i, line in enumerate(document.body):
else:
k = find_token(document.header, "\\options", 0)
if k != -1:
- document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
- del document.header[i]
+ document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
+ del document.header[i]
else:
l = find_token(document.header, "\\use_default_options", 0)
document.header.insert(l, "\\options fleqn")
import re
-# Fast search in lists
-def find_slice(l, sl, start = 0, stop = None):
- """Return position of first occurence of sequence `sl` in list `l`
- as a `slice` object.
-
- >>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
- slice(0, 2, None)
-
- The return value can be used to delete or substitute the sub-list:
-
- >>> l = [1, 0, 1, 1, 1, 2]
- >>> s = find_slice(l, [0, 1, 1])
- >>> del(l[s]); l
- [1, 1, 2]
- >>> s = find_slice(l, (1, 2))
- >>> l[s] = [3]; l
- [1, 3]
-
- The start argument works similar to list.index()
-
- >>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
- slice(4, 6, None)
-
- Use the `stop` attribute of the returned `slice` to test for success:
-
- >>> s1 = find_slice([2, 3, 1], (3, 1))
- >>> s2 = find_slice([2, 3, 1], (2, 1))
- >>> if s1.stop and not s2.stop:
- ... print "wow"
- wow
- """
- stop = stop or len(l)
- N = len(sl) # lenght of sub-list
- try:
- while True:
- for j, value in enumerate(sl):
- i = l.index(value, start, stop)
- if j and i != start:
- start = i-j
- break
- start = i +1
- else:
- return slice(i+1-N, i+1)
- except ValueError: # sub list `sl` not found
- return slice(0, 0)
-
-
# Utilities for one line
def check_token(line, token):
""" check_token(line, token) -> bool
Deprecated. Use line.startswith(token).
"""
-
return line.startswith(token)
# Utilities for a list of lines
-def find_token(lines, token, start, end = 0, ignorews = False):
+def find_token(lines, token, start=0, end=0, ignorews=False):
""" find_token(lines, token, start[[, end], ignorews]) -> int
Return the lowest line where token is found, and is the first
element, in lines[start, end].
If ignorews is True (default is False), then differences in
- whitespace are ignored, except that there must be no extra
- whitespace following token itself.
+ whitespace are ignored, but there must be whitespace following
+ token itself.
Return -1 on failure."""
if end == 0 or end > len(lines):
end = len(lines)
- m = len(token)
+ if ignorews:
+ y = token.split()
for i in range(start, end):
if ignorews:
x = lines[i].split()
- y = token.split()
if len(x) < len(y):
continue
if x[:len(y)] == y:
return i
else:
- if lines[i][:m] == token:
+ if lines[i].startswith(token):
return i
return -1
-def find_token_exact(lines, token, start, end = 0):
+def find_token_exact(lines, token, start=0, end=0):
return find_token(lines, token, start, end, True)
-def find_tokens(lines, tokens, start, end = 0, ignorews = False):
+def find_tokens(lines, tokens, start=0, end=0, ignorews=False):
""" find_tokens(lines, tokens, start[[, end], ignorews]) -> int
Return the lowest line where one token in tokens is found, and is
if x[:len(y)] == y:
return i
else:
- if lines[i][:len(token)] == token:
+ if lines[i].startswith(token):
return i
return -1
-def find_tokens_exact(lines, tokens, start, end = 0):
+def find_tokens_exact(lines, tokens, start=0, end=0):
return find_tokens(lines, tokens, start, end, True)
-def find_re(lines, rexp, start, end = 0):
- """ find_token_re(lines, rexp, start[, end]) -> int
+def find_re(lines, rexp, start=0, end=0):
+ """ find_re(lines, rexp, start[, end]) -> int
Return the lowest line where rexp, a regular expression, is found
in lines[start, end].
element, in lines[start, end].
Return -1 on failure."""
- m = len(token)
for i in range(start, -1, -1):
- line = lines[i]
- if line[:m] == token:
+ if lines[i].startswith(token):
return i
return -1
for i in range(start, -1, -1):
line = lines[i]
for token in tokens:
- if line[:len(token)] == token:
+ if line.startswith(token):
+ return i
+ return -1
+
+
+def find_complete_lines(lines, sublines, start=0, end=0):
+ """Find first occurence of sequence `sublines` in list `lines`.
+ Return index of first line or -1 on failure.
+
+ Efficient search for a sub-list in a large list. Works for any values.
+
+ >>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2])
+ 0
+
+ The `start` and `end` arguments work similar to list.index()
+
+ >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1)
+ 4
+ >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4)
+ -1
+
+ The return value can be used to substitute the sub-list.
+ Take care to check before use:
+
+ >>> l = [1, 1, 2]
+ >>> s = find_complete_lines(l, [1, 2])
+ >>> if s != -1:
+ ... l[s:s+2] = [3]; l
+ [1, 3]
+
+ See also del_complete_lines().
+ """
+ if not sublines:
+ return start
+ end = end or len(lines)
+ N = len(sublines)
+ try:
+ while True:
+ for j, value in enumerate(sublines):
+ i = lines.index(value, start, end)
+ if j and i != start:
+ start = i-j
+ break
+ start = i + 1
+ else:
+ return i +1 - N
+ except ValueError: # `sublines` not found
+ return -1
+
+
+def find_across_lines(lines, sub, start=0, end=0):
+ sublines = sub.splitlines()
+ if len(sublines) > 2:
+ # at least 3 lines: the middle one(s) are complete -> use index search
+ i = find_complete_lines(lines, sublines[1:-1], start+1, end-1)
+ if i < start+1:
+ return -1
+ try:
+ if (lines[i-1].endswith(sublines[0]) and
+ lines[i+len(sublines)].startswith(sublines[-1])):
+ return i-1
+ except IndexError:
+ pass
+ elif len(sublines) > 1:
+ # last subline must start a line
+ i = find_token(lines, sublines[-1], start, end)
+ if i < start + 1:
+ return -1
+ if lines[i-1].endswith(sublines[0]):
+ return i-1
+ else: # no line-break, may be in the middle of a line
+ if end == 0 or end > len(lines):
+ end = len(lines)
+ for i in range(start, end):
+ if sub in lines[i]:
return i
return -1
-def get_value(lines, token, start, end = 0, default = ""):
+def get_value(lines, token, start=0, end=0, default=""):
""" get_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
Returns "followed by other stuff" with leading and trailing
whitespace removed.
"""
-
i = find_token_exact(lines, token, start, end)
if i == -1:
return default
+ # TODO: establish desired behaviour, eventually change to
+ # return lines.pop(i)[len(token):].strip() # or default
+ # see test_parser_tools.py
l = lines[i].split(None, 1)
if len(l) > 1:
return l[1].strip()
return default
-def get_quoted_value(lines, token, start, end = 0, default = ""):
+def get_quoted_value(lines, token, start=0, end=0, default=""):
""" get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
return val.strip('"')
-def get_bool_value(lines, token, start, end = 0, default = None):
- """ get_value(lines, token, start[[, end], default]) -> string
+def get_bool_value(lines, token, start=0, end=0, default=None):
+ """ get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
token bool_value
return re.sub(rx, '\g<1>' + value + '"', line)
-def del_token(lines, token, start, end = 0):
+def del_token(lines, token, start=0, end=0):
""" del_token(lines, token, start, end) -> int
Find the first line in lines where token is the first element
del lines[k]
return True
+def del_complete_lines(lines, sublines, start=0, end=0):
+ """Delete first occurence of `sublines` in list `lines`.
+
+ Efficient deletion of a sub-list in a list. Works for any values.
+ The `start` and `end` arguments work similar to list.index()
+
+ Returns True if a deletion was done and False if not.
+
+ >>> l = [1, 0, 1, 1, 1, 2]
+ >>> del_complete_lines(l, [0, 1, 1])
+ True
+ >>> l
+ [1, 1, 2]
+ """
+ i = find_complete_lines(lines, sublines, start, end)
+ if i == -1:
+ return False
+ del(lines[i:i+len(sublines)])
+ return True
+
+
+def del_value(lines, token, start=0, end=0, default=None):
+ """
+ Find the next line that looks like:
+ token followed by other stuff
+ Delete that line and return "followed by other stuff"
+ with leading and trailing whitespace removed.
+
+ If token is not found, return `default`.
+ """
+ i = find_token_exact(lines, token, start, end)
+ if i == -1:
+ return default
+ return lines.pop(i)[len(token):].strip()
+
def find_beginning_of(lines, i, start_token, end_token):
count = 1
i = find_tokens_backwards(lines, [start_token, end_token], i-1)
if i == -1:
return -1
- if check_token(lines[i], end_token):
+ if lines[i].startswith(end_token):
count = count+1
else:
count = count-1
i = find_tokens(lines, [end_token, start_token], i+1)
if i == -1:
return -1
- if check_token(lines[i], start_token):
+ if lines[i].startswith(start_token):
count = count+1
else:
count = count-1
return -1
-def find_nonempty_line(lines, start, end = 0):
+def find_nonempty_line(lines, start=0, end=0):
if end == 0:
end = len(lines)
for i in range(start, end):
- if is_nonempty_line(lines[i]):
+ if lines[i].strip():
return i
return -1
def test_find_token(self):
self.assertEqual(find_token(lines, '\\emph', 0), 7)
- self.assertEqual(find_token(lines, '\\emph', 0, 5), -1)
- self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1)
- self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7)
+ # no line starts with "emph" (without backspace):
self.assertEqual(find_token(lines, 'emph', 0), -1)
+ # token on line[start] is found:
+ self.assertEqual(find_token(lines, '\\emph', 7), 7)
+ self.assertEqual(find_token(lines, '\\emph', 8), 9)
+ # token on line[end] is not found:
+ self.assertEqual(find_token(lines, '\\emph', 0, 7), -1)
+ # `ignorews` looks for whitespace-separated tokens:
+ self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1)
+ self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7)
+ self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7)
+ self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1)
+ # only first token is found:
+ self.assertEqual(find_token(lines, 'Quotes', 0), -1)
+ self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1)
def test_find_tokens(self):
self.assertEqual(find_tokens(lines, tokens, 0, 4), -1)
+ def test_find_complete_lines(self):
+ sublines = ["\\begin_inset Quotes eld",
+ "\\end_inset"]
+ # return index of first line of sublines:
+ self.assertEqual(find_complete_lines(lines, sublines), 3)
+ self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4)
+ # return -1 if sublines is not found:
+ self.assertEqual(find_complete_lines(lines, ['x']), -1)
+ # search includes line `start`:
+ self.assertEqual(find_complete_lines(lines, sublines, 3), 3)
+ self.assertEqual(find_complete_lines(lines, sublines, 4), 20)
+ self.assertEqual(find_complete_lines(lines, sublines, 21), -1)
+ # serch excludes line `end`
+ self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1)
+ # an empty list is always found
+ self.assertEqual(find_complete_lines(lines, []), 0)
+
+
+ def test_find_across_lines(self):
+ # sub with at least 2 line-breaks (uses find_complete_lines):
+ sub = "Quotes eld\n\\end_inset\n\n\n"
+ self.assertEqual(find_across_lines(lines, sub), 3)
+ # Return -1 if not found
+ self.assertEqual(find_across_lines(lines, sub, 4), -1)
+ self.assertEqual(find_across_lines(lines, sub, 0, 6), -1)
+ sub = "Quotes eld\n\\end_inset\nx\n"
+ self.assertEqual(find_across_lines(lines, sub), -1)
+ sub = "Quotes X\n\\end_inset\n\n"
+ self.assertEqual(find_across_lines(lines, sub), -1)
+ sub = "Quotes eld\n\\end_insert\n\n"
+ self.assertEqual(find_across_lines(lines, sub), -1)
+ # sub with up to 1 line-break:
+ sub = "Quotes eld\n\\end_inset"
+ self.assertEqual(find_across_lines(lines, sub), 3)
+ self.assertEqual(find_across_lines(lines, sub, 4), -1)
+ self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
+ self.assertEqual(find_across_lines(lines, sub, 4, 3), -1)
+ sub = "Quotes X eld\n\\end_inset\n"
+ self.assertEqual(find_across_lines(lines, sub), -1)
+ sub = "Quotes eld\n\\end_insert\n"
+ self.assertEqual(find_across_lines(lines, sub), -1)
+ # sub without line-break
+ sub = "end_"
+ self.assertEqual(find_across_lines(lines, sub), 4)
+ self.assertEqual(find_across_lines(lines, sub, 5), 12)
+ self.assertEqual(find_across_lines(lines, sub, 0, 4), -1)
+ self.assertEqual(find_across_lines(lines, sub, 2, 1), -1)
+ self.assertEqual(find_across_lines(lines, "XXX"), -1)
+
+
+ def test_get_value(self):
+ self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld")
+ # TODO: do we want this:
+ self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld")
+ # or only the part after "token":
+ # self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld")
+ # return default if not found
+ self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42)
+ # TODO: do we want this:
+ self.assertEqual(get_value(lines, "\\end_inset", default=None), None)
+ # or emtpy string if token is found but has no value:
+ # self.assertEqual(get_value(lines, "\\end_inset", default=None), "")
+
+
+ def test_del_complete_lines(self):
+ l = lines[:]
+ sublines = ["\\begin_inset Quotes eld",
+ "\\end_inset"]
+ # normal operation: remove the first occurence of sublines:
+ self.assertEqual(del_complete_lines(l, sublines), True)
+ self.assertEqual(l[3], "")
+ self.assertEqual(len(l), len(lines)-len(sublines))
+ # special cases:
+ l = lines[:]
+ self.assertEqual(del_complete_lines(l, sublines, 21), False)
+ self.assertEqual(l, lines)
+ # deleting empty sublist returns success but does not change the list:
+ self.assertEqual(del_complete_lines(l, [], 21), True)
+ self.assertEqual(l, lines)
+
+ def test_del_value(self):
+ l = lines[:]
+ self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld")
+ self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd")
+ # return default if not found
+ self.assertEqual(del_value(l, "\\begin_insert", default=42), 42)
+ self.assertEqual(del_value(l, "\\end_inset", default=None), "")
+
+
if __name__ == '__main__':
unittest.main()