From: Günter Milde Date: Fri, 9 Feb 2018 15:49:23 +0000 (+0100) Subject: lyx2lyx fixes and cleanup. X-Git-Tag: lyx-2.4.0dev-acb2ca7b~3899 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=8f86b72cf4b88ae74adc5d94f6cbb60b64b8fb28;p=lyx.git lyx2lyx fixes and cleanup. Don't insert empty line when translating QuoteInsets to literal quotes. Fix regexp pattern in re/convert_dashligatures. Adjust logic in re/convert_dash(ligatur)es. --- diff --git a/autotests/export/lyx2lyx/lyx_2_3_test2.lyx b/autotests/export/lyx2lyx/lyx_2_3_test2.lyx index 1d19f2fb33..2740775a5a 100644 --- a/autotests/export/lyx2lyx/lyx_2_3_test2.lyx +++ b/autotests/export/lyx2lyx/lyx_2_3_test2.lyx @@ -12,7 +12,7 @@ logicalmkup \maintain_unincluded_children false \language bosnian \language_package default -\inputencoding auto +\inputencoding utf8 \fontencoding global \font_roman "cochineal" "DejaVu Serif" \font_sans "lmss" "default" @@ -295,5 +295,53 @@ y=x^{2} \end_layout +\begin_layout Description +Quote +\begin_inset space ~ +\end_inset + +insets: Plain quote insets +\begin_inset Quotes qld +\end_inset + + +\begin_inset Quotes qrd +\end_inset + + vs. + literal quotes "". +\end_layout + +\begin_deeper +\begin_layout Verbatim + +Quote insets in Verbatim: +\begin_inset Quotes cld +\end_inset + +foo +\begin_inset Quotes frd +\end_inset + + and +\begin_inset Quotes pls +\end_inset + +bar +\begin_inset Quotes prs +\end_inset + + +\end_layout + +\end_deeper \end_body \end_document diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py index 342bb06119..cb1731304e 100644 --- a/lib/lyx2lyx/lyx_2_2.py +++ b/lib/lyx2lyx/lyx_2_2.py @@ -36,7 +36,7 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert, from parser_tools import (check_token, del_complete_lines, find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re, - find_token, find_token_backwards, get_containing_layout, + find_substring, find_token, find_token_backwards, get_containing_layout, get_containing_inset, get_quoted_value, get_value, is_in_inset, get_bool_value, set_bool_value) @@ -618,19 +618,18 @@ def convert_dashes(document): if document.backend != "latex": return - lines = document.body i = 0 - while i+1 < len(lines): - i += 1 - line = lines[i] - if "--" not in line: - continue + while True: + i = find_substring(document.body, "--", i+1) + if i == -1: + break + line = document.body[i] # skip label width string (bug 10243): if line.startswith("\\labelwidthstring"): continue # Do not touch hyphens in some insets: try: - value, start, end = get_containing_inset(lines, i) + value, start, end = get_containing_inset(document.body, i) except TypeError: # False means no (or malformed) containing inset value, start, end = "no inset", -1, -1 @@ -644,7 +643,7 @@ def convert_dashes(document): i = end continue try: - layout, start, end, j = get_containing_layout(lines, i) + layout, start, end, j = get_containing_layout(document.body, i) except TypeError: # no (or malformed) containing layout document.warning("Malformed LyX document: " "Can't find layout at line %d" % i) @@ -656,18 +655,18 @@ def convert_dashes(document): # Replace as LaTeX does: First try emdash, then endash line = line.replace("---", "\\threehyphens\n") line = line.replace("--", "\\twohyphens\n") - lines[i:i+1] = line.splitlines() + document.body[i:i+1] = line.split('\n') # remove ligature breaks between dashes - i = 1 - while i < len(lines): - line = lines[i] - if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and - lines[i+1].startswith("-")): - lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}", - lines.pop(i+1)) - else: - i += 1 + i = 0 + while True: + i = find_substring(document.body, + r"-\SpecialChar \textcompwordmark{}", i+1) + if i == -1: + break + if document.body[i+1].startswith("-"): + document.body[i] = document.body[i].replace( + r"\SpecialChar \textcompwordmark{}", document.body.pop(i+1)) def revert_dashes(document): @@ -682,19 +681,18 @@ def revert_dashes(document): r'\renewcommand{\textemdash}{---}']) # Insert ligature breaks to prevent ligation of hyphens to dashes: - lines = document.body i = 0 - while i+1 < len(lines): - i += 1 - line = lines[i] - if "--" not in line: - continue + while True: + i = find_substring(document.body, "--", i+1) + if i == -1: + break + line = document.body[i] # skip label width string (bug 10243): if line.startswith("\\labelwidthstring"): continue # do not touch hyphens in some insets (cf. convert_dashes): try: - value, start, end = get_containing_inset(lines, i) + value, start, end = get_containing_inset(document.body, i) except TypeError: # False means no (or malformed) containing inset value, start, end = "no inset", -1, -1 @@ -708,14 +706,14 @@ def revert_dashes(document): # Revert \twohyphens and \threehyphens: i = 1 - while i < len(lines): - line = lines[i] + while i < len(document.body): + line = document.body[i] if not line.endswith("hyphens"): i +=1 elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"): line = line.replace("\\twohyphens", "--") line = line.replace("\\threehyphens", "---") - lines[i] = line + lines.pop(i+1) + document.body[i] = line + document.body.pop(i+1) else: i += 1 diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py index 7f4ceef6bd..815ac2b508 100644 --- a/lib/lyx2lyx/lyx_2_3.py +++ b/lib/lyx2lyx/lyx_2_3.py @@ -132,7 +132,6 @@ def revert_ibranches(document): continue if inverted: branch = document.body[i][20:].strip() - #document.warning(branch) if not branch in antibranches: antibranch = "Anti-" + branch while antibranch in antibranches: @@ -140,7 +139,6 @@ def revert_ibranches(document): antibranches[branch] = antibranch else: antibranch = antibranches[branch] - #document.warning(antibranch) document.body[i] = "\\begin_inset Branch " + antibranch # now we need to add the new branches to the header @@ -420,6 +418,7 @@ def revert_quotes(document): if len(words) > 1 and words[0] == "\\begin_inset" and \ ( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ): j = find_end_of_inset(document.body, i) + if j == -1: document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) i += 1 @@ -434,10 +433,10 @@ def revert_quotes(document): document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k)) i = k continue - replace = "\"" + replace = '"' if document.body[k].endswith("s"): replace = "'" - document.body[k:l+1] = [replace] + document.body[k:l+2] = [replace] else: i += 1 continue @@ -467,7 +466,7 @@ def revert_quotes(document): replace = "\"" if document.body[k].endswith("s"): replace = "'" - document.body[k:l+1] = [replace] + document.body[k:l+2] = [replace] else: i += 1 continue @@ -498,7 +497,7 @@ def revert_quotes(document): replace = "\"" if document.body[k].endswith("s"): replace = "'" - document.body[k:l+1] = [replace] + document.body[k:l+2] = [replace] i = l @@ -602,7 +601,7 @@ def revert_plainquote(document): replace = "\"" if document.body[k].endswith("s"): replace = "'" - document.body[k:l+1] = [replace] + document.body[k:l+2] = [replace] i = l @@ -1799,13 +1798,13 @@ def convert_dashligatures(document): ['% Added by lyx2lyx', r'\renewcommand{\textendash}{--}', r'\renewcommand{\textemdash}{---}']) or None - + if use_dash_ligatures is None: # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n" # or "\threehyphens\n" as interim representation for -- an ---.) lines = document.body has_literal_dashes = has_ligature_dashes = False - dash_pattern = re.compile(u"[\u2013\u2014]|\\twohyphens|\\threehyphens") + dash_pattern = re.compile(u".*[\u2013\u2014]|\\twohyphens|\\threehyphens") i = j = 0 while True: # skip lines without dashes: @@ -1837,13 +1836,13 @@ def convert_dashligatures(document): i = end continue - # literal dash followed by a word or no-break space: - if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", + # literal dash followed by a non-white-character or no-break space: + if re.search(u"[\u2013\u2014]([\S\u00A0\u202F\u2060]|$)", line, flags=re.UNICODE): has_literal_dashes = True - # ligature dash followed by word or no-break space on next line: + # ligature dash followed by non-white-char or no-break space on next line: if (re.search(r"(\\twohyphens|\\threehyphens)", line) and - re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)): + re.match(u"[\S\u00A0\u202F\u2060]", lines[i+1], flags=re.UNICODE)): has_ligature_dashes = True if has_literal_dashes and has_ligature_dashes: # TODO: insert a warning note in the document? @@ -1866,40 +1865,46 @@ def convert_dashligatures(document): def revert_dashligatures(document): """Remove font ligature settings for en- and em-dashes. - Revert conversion of \twodashes or \threedashes to literal dashes.""" + Revert conversion of \twodashes or \threedashes to literal dashes. + """ use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures") if use_dash_ligatures != "true" or document.backend != "latex": return - j = 0 - new_body = [] - for i, line in enumerate(document.body): - # Skip some document parts where dashes are not converted - if (i < j) or line.startswith("\\labelwidthstring"): - new_body.append(line) + i = 0 + dash_pattern = re.compile(u".*[\u2013\u2014]") + while True: + # skip lines without dashes: + i = find_re(document.body, dash_pattern, i+1) + if i == -1: + break + line = document.body[i] + # skip label width string (see bug 10243): + if line.startswith("\\labelwidthstring"): continue - if (line.startswith("\\begin_inset ") and - line[13:].split()[0] in ["CommandInset", "ERT", "External", - "Formula", "FormulaMacro", "Graphics", "IPA", "listings"] - or line == "\\begin_inset Flex Code"): - j = find_end_of_inset(document.body, i) - if j == -1: - document.warning("Malformed LyX document: Can't find end of " - + words[1] + " inset at line " + str(i)) - new_body.append(line) + # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes): + try: + inset_type, start, end = get_containing_inset(document.body, i) + except TypeError: # no containing inset + inset_type, start, end = "no inset", -1, -1 + if (inset_type.split()[0] in + ["CommandInset", "ERT", "External", "Formula", + "FormulaMacro", "Graphics", "IPA", "listings"] + or inset_type == "Flex Code"): + i = end continue - if line == "\\begin_layout LyX-Code": - j = find_end_of_layout(document.body, i) - if j == -1: - document.warning("Malformed LyX document: " - "Can't find end of %s layout at line %d" % (words[1],i)) - new_body.append(line) + try: + layoutname, start, end, j = get_containing_layout(document.body, i) + except TypeError: # no (or malformed) containing layout + document.warning("Malformed LyX document: " + "Can't find layout at body line %d" % i) + continue + if layoutname == "LyX-Code": + i = end continue # TODO: skip replacement in typewriter fonts line = line.replace(u'\u2013', '\\twohyphens\n') line = line.replace(u'\u2014', '\\threehyphens\n') - lines = line.split('\n') - new_body.extend(line.split('\n')) - document.body = new_body + document.body[i:i+1] = line.split('\n') # redefine the dash LICRs to use ligature dashes: add_to_preamble(document, [r'\renewcommand{\textendash}{--}', r'\renewcommand{\textemdash}{---}'])