From: Günter Milde Date: Tue, 23 Jan 2018 07:45:19 +0000 (+0100) Subject: New lyx2lyx tools. X-Git-Tag: lyx-2.4.0dev-acb2ca7b~3985 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=8da6cdcf23d042cf431ae3cab3b2826a7b9f00c4;p=features.git New lyx2lyx tools. New lyx2lyx parser tools find_complete_lines() (replaces find_slice), del_complete_lines(), and find_across_lines(). Default value 0 for start argument in utility functions. Rework the implementation of dash-conversion. --- diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py index a4c899079e..1c7f3dc37f 100644 --- a/lib/lyx2lyx/lyx_2_2.py +++ b/lib/lyx2lyx/lyx_2_2.py @@ -34,9 +34,10 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert, # insert_to_preamble, latex_length, revert_flex_inset, # revert_font_attrs, hex2ratio, str2bool -from parser_tools import (find_end_of_inset, find_end_of_layout, - find_nonempty_line, find_re, find_slice, find_token, find_token_backwards, - get_containing_layout, get_value, check_token) +from parser_tools import (del_complete_lines, + find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re, + find_token, find_token_backwards, get_containing_layout, + get_value, check_token) #################################################################### # Private helper functions @@ -615,130 +616,105 @@ def convert_dashes(document): if document.backend != "latex": return + lines = document.body i = 0 - while i < len(document.body): - words = document.body[i].split() + while i+1 < len(lines): + i += 1 + line = lines[i] + words = line.split() if (len(words) > 1 and words[0] == "\\begin_inset" and (words[1] in ["CommandInset", "ERT", "External", "Formula", "FormulaMacro", "Graphics", "IPA", "listings"] - or ' '.join(words[1:]) == "Flex Code")): + or line.endswith("Flex Code"))): # must not replace anything in insets that store LaTeX contents in .lyx files # (math and command insets without overridden read() and write() methods # filtering out IPA makes Text::readParToken() more simple # skip ERT as well since it is not needed there # Flex Code is logical markup, typically rendered as typewriter - j = find_end_of_inset(document.body, i) + j = find_end_of_inset(lines, i) if j == -1: - document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) - i += 1 + document.warning("Malformed LyX document: Can't find end of " + + words[1] + " inset at line " + str(i)) else: i = j continue - if document.body[i] == "\\begin_layout LyX-Code": - j = find_end_of_layout(document.body, i) + if lines[i] == "\\begin_layout LyX-Code": + j = find_end_of_layout(lines, i) if j == -1: document.warning("Malformed LyX document: " "Can't find end of %s layout at line %d" % (words[1],i)) - i += 1 else: i = j continue - - if len(words) > 0 and words[0] in ["\\leftindent", "\\paragraph_spacing", "\\align", "\\labelwidthstring"]: - # skip paragraph parameters (bug 10243) - i += 1 + if line.startswith("\\labelwidthstring"): + # skip label width string (bug 10243) continue - while True: - j = document.body[i].find("--") - if j == -1: - break - front = document.body[i][:j] - back = document.body[i][j+2:] - # We can have an arbitrary number of consecutive hyphens. - # These must be split into the corresponding number of two and three hyphens - # We must match what LaTeX does: First try emdash, then endash, then single hyphen - if back.find("-") == 0: - back = back[1:] - if len(back) > 0: - document.body.insert(i+1, back) - document.body[i] = front + "\\threehyphens" - else: - if len(back) > 0: - document.body.insert(i+1, back) - document.body[i] = front + "\\twohyphens" - i += 1 - i = 0 - while i < len(document.body): - line = document.body[i] - while (line.endswith(r"-\SpecialChar \textcompwordmark{}") and - document.body[i+1].startswith("-")): - line = line.replace(r"\SpecialChar \textcompwordmark{}", - document.body.pop(i+1)) - document.body[i] = line - i += 1 + if "--" in line: + # We can have an arbitrary number of consecutive hyphens. + # Replace as LaTeX does: First try emdash, then endash + line = line.replace("---", "\\threehyphens\n") + line = line.replace("--", "\\twohyphens\n") + lines[i:i+1] = line.splitlines() + + # remove ligature breaks between dashes + i = 1 + while i < len(lines): + line = lines[i] + if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and + lines[i+1].startswith("-")): + lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}", + lines.pop(i+1)) + else: + i += 1 -# Return number of the next line to check for dashes. -def _dashes_next_line(document, i): - i +=1 - words = document.body[i].split() - # skip paragraph parameters (bug 10243): - if words and words[0] in ["\\leftindent", "\\paragraph_spacing", - "\\align", "\\labelwidthstring"]: - i += 1 - words = document.body[i].split() - # some insets should be skipped in revert_dashes (cf. convert_dashes) - if (len(words) > 1 and words[0] == "\\begin_inset" and - words[1] in ["CommandInset", "ERT", "External", "Formula", - "FormulaMacro", "Graphics", "IPA", "listings"]): - j = find_end_of_inset(document.body, i) - if j == -1: - document.warning("Malformed LyX document: Can't find end of " - + words[1] + " inset at line " + str(i)) - return i - return j+1 - return i def revert_dashes(document): """ Prevent ligatures of existing --- and --. - Convert \\twohyphens and \\threehyphens to -- and ---. + Revert \\twohyphens and \\threehyphens to -- and ---. Remove preamble code from 2.3->2.2 conversion. """ - # Remove preamble code from 2.3->2.2 conversion: - dash_renew_lines = find_slice(document.preamble, - ['% Added by lyx2lyx', - r'\renewcommand{\textendash}{--}', - r'\renewcommand{\textemdash}{---}']) - del(document.preamble[dash_renew_lines]) - # Prevent ligation of hyphens: + del_complete_lines(document.preamble, + ['% Added by lyx2lyx', + r'\renewcommand{\textendash}{--}', + r'\renewcommand{\textemdash}{---}']) + # Insert ligature breaks to prevent ligation of hyphens to dashes: + lines = document.body i = 0 - while i < len(document.body)-1: - # increment i, skip some insets (cf. convert_dashes) - i = _dashes_next_line(document, i) - line = document.body[i] + while i+1 < len(lines): + i += 1 + line = lines[i] + # skip label width string (bug 10243): + if line.startswith("\\labelwidthstring"): + continue + # do not touch hyphens in some insets (cf. convert_dashes): + if line.startswith("\\begin_inset"): + try: + if line.split()[1] in ["CommandInset", "ERT", "External", + "Formula", "FormulaMacro", "Graphics", + "IPA", "listings"]: + j = find_end_of_inset(lines, i) + if j == -1: + document.warning("Malformed LyX document: Can't find " + "end of %s inset at line %d." % (itype, i)) + continue + i = j + except IndexError: + continue if "--" in line: line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-") document.body[i:i+1] = line.split('\n') - # Convert \twohyphens and \threehyphens: - i = 0 - while i < len(document.body): - # skip some insets (see convert_dashes()) - i = _dashes_next_line(document, i-1) - replaced = False - if document.body[i].find("\\twohyphens") >= 0: - document.body[i] = document.body[i].replace("\\twohyphens", "--") - replaced = True - if document.body[i].find("\\threehyphens") >= 0: - document.body[i] = document.body[i].replace("\\threehyphens", "---") - replaced = True - if replaced and i+1 < len(document.body) and \ - (document.body[i+1].find("\\") != 0 or \ - document.body[i+1].find("\\twohyphens") == 0 or - document.body[i+1].find("\\threehyphens") == 0) and \ - len(document.body[i]) + len(document.body[i+1]) <= 80: - document.body[i] = document.body[i] + document.body[i+1] - document.body[i+1:i+2] = [] + # Revert \twohyphens and \threehyphens: + i = 1 + while i < len(lines): + line = lines[i] + if not line.endswith("hyphens"): + i +=1 + elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"): + line = line.replace("\\twohyphens", "--") + line = line.replace("\\threehyphens", "---") + lines[i] = line + lines.pop(i+1) else: i += 1 @@ -879,16 +855,16 @@ def revert_georgian(document): document.language = "english" i = find_token(document.header, "\\language georgian", 0) if i != -1: - document.header[i] = "\\language english" + document.header[i] = "\\language english" j = find_token(document.header, "\\language_package default", 0) if j != -1: - document.header[j] = "\\language_package babel" + document.header[j] = "\\language_package babel" k = find_token(document.header, "\\options", 0) if k != -1: - document.header[k] = document.header[k].replace("\\options", "\\options georgian,") + document.header[k] = document.header[k].replace("\\options", "\\options georgian,") else: - l = find_token(document.header, "\\use_default_options", 0) - document.header.insert(l + 1, "\\options georgian") + l = find_token(document.header, "\\use_default_options", 0) + document.header.insert(l + 1, "\\options georgian") def revert_sigplan_doi(document): diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py index fc44a11521..625db08604 100644 --- a/lib/lyx2lyx/lyx_2_3.py +++ b/lib/lyx2lyx/lyx_2_3.py @@ -24,9 +24,9 @@ import sys, os # Uncomment only what you need to import, please. -from parser_tools import del_token, find_end_of, find_end_of_layout, \ - find_end_of_inset, find_re, find_slice, find_token, \ - find_token_backwards, get_containing_layout, \ +from parser_tools import del_token, del_value, del_complete_lines, \ + find_end_of, find_end_of_layout, find_end_of_inset, find_re, \ + find_token, find_token_backwards, get_containing_layout, \ get_bool_value, get_value, get_quoted_value # find_tokens, find_token_exact, is_in_inset, \ # check_token, get_option_value @@ -1303,7 +1303,7 @@ def revert_biblatex(document): "Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\ "footcitet", "footcitep", "footcitealt", "footcitealp",\ "footciteauthor", "footciteyear", "footciteyearpar",\ - "citefield", "citetitle", "cite*" ] + "citefield", "citetitle", "cite*" ] i = 0 while (True): @@ -1843,19 +1843,16 @@ def revert_chapterbib(document): def convert_dashligatures(document): "Set 'use_dash_ligatures' according to content." - use_dash_ligatures = None - # Eventually remove preamble code from 2.3->2.2 conversion: - dash_renew_lines = find_slice(document.preamble, - ['% Added by lyx2lyx', - r'\renewcommand{\textendash}{--}', - r'\renewcommand{\textemdash}{---}']) - del(document.preamble[dash_renew_lines]) - use_dash_ligatures = bool(dash_renew_lines.stop) + # Look for and remove dashligatures workaround from 2.3->2.2 reversion, + # set use_dash_ligatures to True if found, to None else. + use_dash_ligatures = del_complete_lines(document.preamble, + ['% Added by lyx2lyx', + r'\renewcommand{\textendash}{--}', + r'\renewcommand{\textemdash}{---}']) or None if use_dash_ligatures is None: - # Look for dashes: - # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n" - # as interim representation for dash ligatures) + # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n" + # or "\threehyphens\n" as interim representation for -- an ---.) has_literal_dashes = False has_ligature_dashes = False j = 0 @@ -1863,16 +1860,19 @@ def convert_dashligatures(document): # Skip some document parts where dashes are not converted if (i < j) or line.startswith("\\labelwidthstring"): continue - words = line.split() - if (len(words) > 1 and words[0] == "\\begin_inset" - and (words[1] in ["CommandInset", "ERT", "External", "Formula", - "FormulaMacro", "Graphics", "IPA", "listings"] - or ' '.join(words[1:]) == "Flex Code")): - j = find_end_of_inset(document.body, i) - if j == -1: - document.warning("Malformed LyX document: " - "Can't find end of %s inset at line %d" % (words[1],i)) - continue + if line.startswith("\\begin_inset"): + try: + it = line.split()[1] + except IndexError: + continue + if (it in ["CommandInset", "ERT", "External", "Formula", + "FormulaMacro", "Graphics", "IPA", "listings"] + or line.endswith("Flex Code")): + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning("Malformed LyX document: Can't " + "find end of %s inset at line %d." % (itype, i)) + continue if line == "\\begin_layout LyX-Code": j = find_end_of_layout(document.body, i) if j == -1: @@ -1898,22 +1898,16 @@ def convert_dashligatures(document): use_dash_ligatures = True # insert the setting if there is a preferred value if use_dash_ligatures is not None: - i = find_token(document.header, "\\use_microtype", 0) - if i != -1: - document.header.insert(i+1, "\\use_dash_ligatures %s" - % str(use_dash_ligatures).lower()) + i = find_token(document.header, "\\graphics") + document.header.insert(i, "\\use_dash_ligatures %s" + % str(use_dash_ligatures).lower()) def revert_dashligatures(document): """Remove font ligature settings for en- and em-dashes. Revert conversion of \twodashes or \threedashes to literal dashes.""" - i = find_token(document.header, "\\use_dash_ligatures", 0) - if i == -1: - return - use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i) - del document.header[i] - if not use_dash_ligatures or document.backend != "latex": + use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures") + if use_dash_ligatures != "true" or document.backend != "latex": return - j = 0 new_body = [] for i, line in enumerate(document.body): @@ -2018,8 +2012,8 @@ def revert_mathindent(document): else: k = find_token(document.header, "\\options", 0) if k != -1: - document.header[k] = document.header[k].replace("\\options", "\\options fleqn,") - del document.header[i] + document.header[k] = document.header[k].replace("\\options", "\\options fleqn,") + del document.header[i] else: l = find_token(document.header, "\\use_default_options", 0) document.header.insert(l, "\\options fleqn") diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index 44ac5d9045..7818ac5dd7 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -156,53 +156,6 @@ count_pars_in_inset(lines, i): import re -# Fast search in lists -def find_slice(l, sl, start = 0, stop = None): - """Return position of first occurence of sequence `sl` in list `l` - as a `slice` object. - - >>> find_slice([1, 2, 3, 1, 1, 2], (1, 2)) - slice(0, 2, None) - - The return value can be used to delete or substitute the sub-list: - - >>> l = [1, 0, 1, 1, 1, 2] - >>> s = find_slice(l, [0, 1, 1]) - >>> del(l[s]); l - [1, 1, 2] - >>> s = find_slice(l, (1, 2)) - >>> l[s] = [3]; l - [1, 3] - - The start argument works similar to list.index() - - >>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1) - slice(4, 6, None) - - Use the `stop` attribute of the returned `slice` to test for success: - - >>> s1 = find_slice([2, 3, 1], (3, 1)) - >>> s2 = find_slice([2, 3, 1], (2, 1)) - >>> if s1.stop and not s2.stop: - ... print "wow" - wow - """ - stop = stop or len(l) - N = len(sl) # lenght of sub-list - try: - while True: - for j, value in enumerate(sl): - i = l.index(value, start, stop) - if j and i != start: - start = i-j - break - start = i +1 - else: - return slice(i+1-N, i+1) - except ValueError: # sub list `sl` not found - return slice(0, 0) - - # Utilities for one line def check_token(line, token): """ check_token(line, token) -> bool @@ -212,7 +165,6 @@ def check_token(line, token): Deprecated. Use line.startswith(token). """ - return line.startswith(token) @@ -225,40 +177,40 @@ def is_nonempty_line(line): # Utilities for a list of lines -def find_token(lines, token, start, end = 0, ignorews = False): +def find_token(lines, token, start=0, end=0, ignorews=False): """ find_token(lines, token, start[[, end], ignorews]) -> int Return the lowest line where token is found, and is the first element, in lines[start, end]. If ignorews is True (default is False), then differences in - whitespace are ignored, except that there must be no extra - whitespace following token itself. + whitespace are ignored, but there must be whitespace following + token itself. Return -1 on failure.""" if end == 0 or end > len(lines): end = len(lines) - m = len(token) + if ignorews: + y = token.split() for i in range(start, end): if ignorews: x = lines[i].split() - y = token.split() if len(x) < len(y): continue if x[:len(y)] == y: return i else: - if lines[i][:m] == token: + if lines[i].startswith(token): return i return -1 -def find_token_exact(lines, token, start, end = 0): +def find_token_exact(lines, token, start=0, end=0): return find_token(lines, token, start, end, True) -def find_tokens(lines, tokens, start, end = 0, ignorews = False): +def find_tokens(lines, tokens, start=0, end=0, ignorews=False): """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int Return the lowest line where one token in tokens is found, and is @@ -278,17 +230,17 @@ def find_tokens(lines, tokens, start, end = 0, ignorews = False): if x[:len(y)] == y: return i else: - if lines[i][:len(token)] == token: + if lines[i].startswith(token): return i return -1 -def find_tokens_exact(lines, tokens, start, end = 0): +def find_tokens_exact(lines, tokens, start=0, end=0): return find_tokens(lines, tokens, start, end, True) -def find_re(lines, rexp, start, end = 0): - """ find_token_re(lines, rexp, start[, end]) -> int +def find_re(lines, rexp, start=0, end=0): + """ find_re(lines, rexp, start[, end]) -> int Return the lowest line where rexp, a regular expression, is found in lines[start, end]. @@ -310,10 +262,8 @@ def find_token_backwards(lines, token, start): element, in lines[start, end]. Return -1 on failure.""" - m = len(token) for i in range(start, -1, -1): - line = lines[i] - if line[:m] == token: + if lines[i].startswith(token): return i return -1 @@ -328,12 +278,86 @@ def find_tokens_backwards(lines, tokens, start): for i in range(start, -1, -1): line = lines[i] for token in tokens: - if line[:len(token)] == token: + if line.startswith(token): + return i + return -1 + + +def find_complete_lines(lines, sublines, start=0, end=0): + """Find first occurence of sequence `sublines` in list `lines`. + Return index of first line or -1 on failure. + + Efficient search for a sub-list in a large list. Works for any values. + + >>> find_complete_lines([1, 2, 3, 1, 1, 2], [1, 2]) + 0 + + The `start` and `end` arguments work similar to list.index() + + >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1) + 4 + >>> find_complete_lines([1, 2, 3, 1, 1 ,2], [1, 2], start=1, end=4) + -1 + + The return value can be used to substitute the sub-list. + Take care to check before use: + + >>> l = [1, 1, 2] + >>> s = find_complete_lines(l, [1, 2]) + >>> if s != -1: + ... l[s:s+2] = [3]; l + [1, 3] + + See also del_complete_lines(). + """ + if not sublines: + return start + end = end or len(lines) + N = len(sublines) + try: + while True: + for j, value in enumerate(sublines): + i = lines.index(value, start, end) + if j and i != start: + start = i-j + break + start = i + 1 + else: + return i +1 - N + except ValueError: # `sublines` not found + return -1 + + +def find_across_lines(lines, sub, start=0, end=0): + sublines = sub.splitlines() + if len(sublines) > 2: + # at least 3 lines: the middle one(s) are complete -> use index search + i = find_complete_lines(lines, sublines[1:-1], start+1, end-1) + if i < start+1: + return -1 + try: + if (lines[i-1].endswith(sublines[0]) and + lines[i+len(sublines)].startswith(sublines[-1])): + return i-1 + except IndexError: + pass + elif len(sublines) > 1: + # last subline must start a line + i = find_token(lines, sublines[-1], start, end) + if i < start + 1: + return -1 + if lines[i-1].endswith(sublines[0]): + return i-1 + else: # no line-break, may be in the middle of a line + if end == 0 or end > len(lines): + end = len(lines) + for i in range(start, end): + if sub in lines[i]: return i return -1 -def get_value(lines, token, start, end = 0, default = ""): +def get_value(lines, token, start=0, end=0, default=""): """ get_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: @@ -341,17 +365,19 @@ def get_value(lines, token, start, end = 0, default = ""): Returns "followed by other stuff" with leading and trailing whitespace removed. """ - i = find_token_exact(lines, token, start, end) if i == -1: return default + # TODO: establish desired behaviour, eventually change to + # return lines.pop(i)[len(token):].strip() # or default + # see test_parser_tools.py l = lines[i].split(None, 1) if len(l) > 1: return l[1].strip() return default -def get_quoted_value(lines, token, start, end = 0, default = ""): +def get_quoted_value(lines, token, start=0, end=0, default=""): """ get_quoted_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: @@ -368,8 +394,8 @@ def get_quoted_value(lines, token, start, end = 0, default = ""): return val.strip('"') -def get_bool_value(lines, token, start, end = 0, default = None): - """ get_value(lines, token, start[[, end], default]) -> string +def get_bool_value(lines, token, start=0, end=0, default=None): + """ get_bool_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: token bool_value @@ -405,7 +431,7 @@ def set_option_value(line, option, value): return re.sub(rx, '\g<1>' + value + '"', line) -def del_token(lines, token, start, end = 0): +def del_token(lines, token, start=0, end=0): """ del_token(lines, token, start, end) -> int Find the first line in lines where token is the first element @@ -418,6 +444,41 @@ def del_token(lines, token, start, end = 0): del lines[k] return True +def del_complete_lines(lines, sublines, start=0, end=0): + """Delete first occurence of `sublines` in list `lines`. + + Efficient deletion of a sub-list in a list. Works for any values. + The `start` and `end` arguments work similar to list.index() + + Returns True if a deletion was done and False if not. + + >>> l = [1, 0, 1, 1, 1, 2] + >>> del_complete_lines(l, [0, 1, 1]) + True + >>> l + [1, 1, 2] + """ + i = find_complete_lines(lines, sublines, start, end) + if i == -1: + return False + del(lines[i:i+len(sublines)]) + return True + + +def del_value(lines, token, start=0, end=0, default=None): + """ + Find the next line that looks like: + token followed by other stuff + Delete that line and return "followed by other stuff" + with leading and trailing whitespace removed. + + If token is not found, return `default`. + """ + i = find_token_exact(lines, token, start, end) + if i == -1: + return default + return lines.pop(i)[len(token):].strip() + def find_beginning_of(lines, i, start_token, end_token): count = 1 @@ -425,7 +486,7 @@ def find_beginning_of(lines, i, start_token, end_token): i = find_tokens_backwards(lines, [start_token, end_token], i-1) if i == -1: return -1 - if check_token(lines[i], end_token): + if lines[i].startswith(end_token): count = count+1 else: count = count-1 @@ -441,7 +502,7 @@ def find_end_of(lines, i, start_token, end_token): i = find_tokens(lines, [end_token, start_token], i+1) if i == -1: return -1 - if check_token(lines[i], start_token): + if lines[i].startswith(start_token): count = count+1 else: count = count-1 @@ -450,11 +511,11 @@ def find_end_of(lines, i, start_token, end_token): return -1 -def find_nonempty_line(lines, start, end = 0): +def find_nonempty_line(lines, start=0, end=0): if end == 0: end = len(lines) for i in range(start, end): - if is_nonempty_line(lines[i]): + if lines[i].strip(): return i return -1 diff --git a/lib/lyx2lyx/test_parser_tools.py b/lib/lyx2lyx/test_parser_tools.py index bb3b32feb7..a9d4faee82 100644 --- a/lib/lyx2lyx/test_parser_tools.py +++ b/lib/lyx2lyx/test_parser_tools.py @@ -77,10 +77,21 @@ class TestParserTools(unittest.TestCase): def test_find_token(self): self.assertEqual(find_token(lines, '\\emph', 0), 7) - self.assertEqual(find_token(lines, '\\emph', 0, 5), -1) - self.assertEqual(find_token(lines, '\\emp', 0, 0, True), -1) - self.assertEqual(find_token(lines, '\\emp', 0, 0, False), 7) + # no line starts with "emph" (without backspace): self.assertEqual(find_token(lines, 'emph', 0), -1) + # token on line[start] is found: + self.assertEqual(find_token(lines, '\\emph', 7), 7) + self.assertEqual(find_token(lines, '\\emph', 8), 9) + # token on line[end] is not found: + self.assertEqual(find_token(lines, '\\emph', 0, 7), -1) + # `ignorews` looks for whitespace-separated tokens: + self.assertEqual(find_token(lines, '\\emp', 0, ignorews=True), -1) + self.assertEqual(find_token(lines, '\\emph',0, ignorews=True), 7) + self.assertEqual(find_token(lines, '\\emph', 7, ignorews=True), 7) + self.assertEqual(find_token(lines, '\\emph', 0, 7, True), -1) + # only first token is found: + self.assertEqual(find_token(lines, 'Quotes', 0), -1) + self.assertEqual(find_token(lines, 'Quotes', 0, ignorews=True), -1) def test_find_tokens(self): @@ -89,5 +100,94 @@ class TestParserTools(unittest.TestCase): self.assertEqual(find_tokens(lines, tokens, 0, 4), -1) + def test_find_complete_lines(self): + sublines = ["\\begin_inset Quotes eld", + "\\end_inset"] + # return index of first line of sublines: + self.assertEqual(find_complete_lines(lines, sublines), 3) + self.assertEqual(find_complete_lines(lines, ["\\end_inset"]), 4) + # return -1 if sublines is not found: + self.assertEqual(find_complete_lines(lines, ['x']), -1) + # search includes line `start`: + self.assertEqual(find_complete_lines(lines, sublines, 3), 3) + self.assertEqual(find_complete_lines(lines, sublines, 4), 20) + self.assertEqual(find_complete_lines(lines, sublines, 21), -1) + # serch excludes line `end` + self.assertEqual(find_complete_lines(lines, sublines, 4, 20), -1) + # an empty list is always found + self.assertEqual(find_complete_lines(lines, []), 0) + + + def test_find_across_lines(self): + # sub with at least 2 line-breaks (uses find_complete_lines): + sub = "Quotes eld\n\\end_inset\n\n\n" + self.assertEqual(find_across_lines(lines, sub), 3) + # Return -1 if not found + self.assertEqual(find_across_lines(lines, sub, 4), -1) + self.assertEqual(find_across_lines(lines, sub, 0, 6), -1) + sub = "Quotes eld\n\\end_inset\nx\n" + self.assertEqual(find_across_lines(lines, sub), -1) + sub = "Quotes X\n\\end_inset\n\n" + self.assertEqual(find_across_lines(lines, sub), -1) + sub = "Quotes eld\n\\end_insert\n\n" + self.assertEqual(find_across_lines(lines, sub), -1) + # sub with up to 1 line-break: + sub = "Quotes eld\n\\end_inset" + self.assertEqual(find_across_lines(lines, sub), 3) + self.assertEqual(find_across_lines(lines, sub, 4), -1) + self.assertEqual(find_across_lines(lines, sub, 0, 4), -1) + self.assertEqual(find_across_lines(lines, sub, 4, 3), -1) + sub = "Quotes X eld\n\\end_inset\n" + self.assertEqual(find_across_lines(lines, sub), -1) + sub = "Quotes eld\n\\end_insert\n" + self.assertEqual(find_across_lines(lines, sub), -1) + # sub without line-break + sub = "end_" + self.assertEqual(find_across_lines(lines, sub), 4) + self.assertEqual(find_across_lines(lines, sub, 5), 12) + self.assertEqual(find_across_lines(lines, sub, 0, 4), -1) + self.assertEqual(find_across_lines(lines, sub, 2, 1), -1) + self.assertEqual(find_across_lines(lines, "XXX"), -1) + + + def test_get_value(self): + self.assertEqual(get_value(lines, "\\begin_inset"), "Quotes eld") + # TODO: do we want this: + self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "Quotes eld") + # or only the part after "token": + # self.assertEqual(get_value(lines, "\\begin_inset Quotes"), "eld") + # return default if not found + self.assertEqual(get_value(lines, "\\begin_insert", default=42), 42) + # TODO: do we want this: + self.assertEqual(get_value(lines, "\\end_inset", default=None), None) + # or emtpy string if token is found but has no value: + # self.assertEqual(get_value(lines, "\\end_inset", default=None), "") + + + def test_del_complete_lines(self): + l = lines[:] + sublines = ["\\begin_inset Quotes eld", + "\\end_inset"] + # normal operation: remove the first occurence of sublines: + self.assertEqual(del_complete_lines(l, sublines), True) + self.assertEqual(l[3], "") + self.assertEqual(len(l), len(lines)-len(sublines)) + # special cases: + l = lines[:] + self.assertEqual(del_complete_lines(l, sublines, 21), False) + self.assertEqual(l, lines) + # deleting empty sublist returns success but does not change the list: + self.assertEqual(del_complete_lines(l, [], 21), True) + self.assertEqual(l, lines) + + def test_del_value(self): + l = lines[:] + self.assertEqual(del_value(l, "\\begin_inset"), "Quotes eld") + self.assertEqual(del_value(l, "\\begin_inset Quotes"), "erd") + # return default if not found + self.assertEqual(del_value(l, "\\begin_insert", default=42), 42) + self.assertEqual(del_value(l, "\\end_inset", default=None), "") + + if __name__ == '__main__': unittest.main()