X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx_2_0.py;h=2756418f88d09afb0671878977800b6a8f525584;hb=978ecc33092206c9d3ffcead33af5442f3370a5b;hp=8f60458d0ff35af016b5a7b29be3c0f3d5d89c7b;hpb=88f3be373a97b0b18b69ec2540152648d9a4ff60;p=lyx.git diff --git a/lib/lyx2lyx/lyx_2_0.py b/lib/lyx2lyx/lyx_2_0.py index 8f60458d0f..2756418f88 100644 --- a/lib/lyx2lyx/lyx_2_0.py +++ b/lib/lyx2lyx/lyx_2_0.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # This file is part of lyx2lyx # -*- coding: utf-8 -*- -# Copyright (C) 2010 The LyX team +# Copyright (C) 2011 The LyX team # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -15,7 +15,7 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 2.0""" @@ -26,7 +26,7 @@ import sys, os from parser_tools import find_token, find_end_of, find_tokens, \ find_token_exact, find_end_of_inset, find_end_of_layout, \ find_token_backwards, is_in_inset, get_value, get_quoted_value, \ - del_token, check_token + del_token, check_token, get_option_value from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \ put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \ @@ -45,17 +45,6 @@ def remove_option(lines, m, option): return True -# DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that -# have been made of it with uses of put_cmd_in_ert. -def old_put_cmd_in_ert(string): - for rep in unicode_reps: - string = string.replace(rep[1], rep[0].replace('\\\\', '\\')) - string = string.replace('\\', "\\backslash\n") - string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Plain Layout\n" \ - + string + "\n\\end_layout\n\\end_inset" - return string - - ############################################################################### ### ### Conversion and reversion routines @@ -214,7 +203,7 @@ def revert_xetex(document): # 2.) check font settings # defaults - roman = sans = typew = default + roman = sans = typew = "default" osf = False sf_scale = tt_scale = 100.0 @@ -288,11 +277,11 @@ def revert_xetex(document): sf += 'Scale=' + str(sf_scale / 100.0) + ',' sf += 'Mapping=tex-text]{' + sans + '}' pretext.append(sf) - if typewriter != "default": + if typew != "default": tw = '\\setmonofont' if tt_scale != 100.0: tw += '[Scale=' + str(tt_scale / 100.0) + ']' - tw += '{' + typewriter + '}' + tw += '{' + typew + '}' pretext.append(tw) if osf: pretext.append('\\defaultfontfeatures{Numbers=OldStyle}') @@ -750,7 +739,6 @@ def convert_author_id(document): name = m.group(2) email = m.group(3) document.header[i] = "\\author %i %s %s" % (anum, name, email) - # FIXME Should this really be incremented if we didn't match? anum += 1 i += 1 @@ -772,7 +760,7 @@ def revert_author_id(document): " Remove the author_id from the \\author definition " i = 0 anum = 0 - rx = re.compile(r'(\\author)\s+(\d+)\s+(\".*\")\s*(.*)$') + rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$') idmap = dict() while True: @@ -817,6 +805,28 @@ def revert_suppress_date(document): del document.header[i] +def convert_mhchem(document): + "Set mhchem to off for versions older than 1.6.x" + if document.start < 277: + # LyX 1.5.x and older did never load mhchem. + # Therefore we must switch it off: Documents that use mhchem have + # a manual \usepackage anyway, and documents not using mhchem but + # custom macros with the same names as mhchem commands might get + # corrupted if mhchem is automatically loaded. + mhchem = 0 # off + else: + # LyX 1.6.x did always load mhchem automatically. + mhchem = 1 # auto + i = find_token(document.header, "\\use_esint", 0) + if i == -1: + # pre-1.5.x document + i = find_token(document.header, "\\use_amsmath", 0) + if i == -1: + document.warning("Malformed LyX document: Could not find amsmath os esint setting.") + return + document.header.insert(i + 1, "\\use_mhchem %d" % mhchem) + + def revert_mhchem(document): "Revert mhchem loading to preamble code" @@ -940,64 +950,130 @@ def revert_includeall(document): def revert_multirow(document): " Revert multirow cells in tables to TeX-code" - i = 0 - multirow = False + + # first, let's find out if we need to do anything + # cell type 3 is multirow begin cell + i = find_token(document.body, '", i) - if cend == -1: - document.warning("Malformed LyX document: Could not find end of tabular cell.") - i += 1 - continue - blay = find_token(document.body, "\\begin_layout", i, cend) - if blay == -1: - document.warning("Can't find layout for cell!") - i = j - continue - bend = find_end_of_layout(document.body, blay) - if blay == -1: - document.warning("Can't find end of layout for cell!") - i = cend + # find begin/end of table + begin_table = find_token(document.body, '') + if end_table == -1: + document.warning("Malformed LyX document: Could not find end of table.") + begin_table += 1 + continue + # does this table have multirow? + i = find_token(document.body, '', begin_row, end_table) + if begin_row == -1: + document.warning("Can't find row " + str(row + 1)) break - # remove the multirow tag, set the valignment to top - # and remove the top line - # FIXME Are we sure these always have space around them? - document.body[k] = document.body[k].replace(' multirow="4" ', ' ') - document.body[k] = document.body[k].replace('valignment="middle"', 'valignment="top"') - document.body[k] = document.body[k].replace(' topline="true" ', ' ') - k += 1 - # this will always be ok - i = cend - - if multirow == True: - add_to_preamble(document, ["\\usepackage{multirow}"]) + end_row = find_end_of(document.body, begin_row, '', '') + if end_row == -1: + document.warning("Can't find end of row " + str(row + 1)) + break + begin_cell = begin_row + multirows.append([]) + for column in range(numcols): + begin_cell = find_token(document.body, '" in a cell, but + # that seems fairly unlikely. + end_cell = find_end_of(document.body, begin_cell, '') + if end_cell == -1: + document.warning("Can't find end of column " + str(column + 1) + \ + "in row " + str(row + 1)) + break + multirows[row].append([begin_cell, end_cell, 0]) + if document.body[begin_cell].find('multirow="3"') != -1: + multirows[row][column][2] = 3 # begin multirow + mrstarts.append([row, column]) + elif document.body[begin_cell].find('multirow="4"') != -1: + multirows[row][column][2] = 4 # in multirow + begin_cell = end_cell + begin_row = end_row + # end of table info collection + + # work from the back to avoid messing up numbering + mrstarts.reverse() + for m in mrstarts: + row = m[0] + col = m[1] + # get column width + col_width = get_option_value(document.body[begin_table + 2 + col], "width") + # "0pt" means that no width is specified + if not col_width or col_width == "0pt": + col_width = "*" + # determine the number of cells that are part of the multirow + nummrs = 1 + for r in range(row + 1, numrows): + if multirows[r][col][2] != 4: + break + nummrs += 1 + # take the opportunity to revert this line + lineno = multirows[r][col][0] + document.body[lineno] = document.body[lineno].\ + replace(' multirow="4" ', ' ').\ + replace('valignment="middle"', 'valignment="top"').\ + replace(' topline="true" ', ' ') + # remove bottom line of previous multirow-part cell + lineno = multirows[r-1][col][0] + document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ') + # revert beginning cell + bcell = multirows[row][col][0] + ecell = multirows[row][col][1] + document.body[bcell] = document.body[bcell].\ + replace(' multirow="3" ', ' ').\ + replace('valignment="middle"', 'valignment="top"') + blay = find_token(document.body, "\\begin_layout", bcell, ecell) + if blay == -1: + document.warning("Can't find layout for cell!") + continue + bend = find_end_of_layout(document.body, blay) + if bend == -1: + document.warning("Can't find end of layout for cell!") + continue + # do the later one first, so as not to mess up the numbering + # we are wrapping the whole cell in this ert + # so before the end of the layout... + document.body[bend:bend] = put_cmd_in_ert("}") + # ...and after the beginning + document.body[blay + 1:blay + 1] = \ + put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{") + + begin_table = end_table def convert_math_output(document): @@ -1483,7 +1559,7 @@ def convert_use_makebox(document): i = z continue document.body.insert(k + 1, "use_makebox 0") - i = z + 1 + i = blay + 1 # not z + 1 (box insets may be nested) def revert_IEEEtran(document): @@ -1739,9 +1815,18 @@ def revert_flexnames(document): def convert_mathdots(document): " Load mathdots automatically " - i = find_token(document.header, "\\use_esint" , 0) - if i != -1: - document.header.insert(i + 1, "\\use_mathdots 1") + i = find_token(document.header, "\\use_mhchem" , 0) + if i == -1: + i = find_token(document.header, "\\use_esint" , 0) + if i == -1: + document.warning("Malformed LyX document: Can't find \\use_mhchem.") + return; + j = find_token(document.preamble, "\\usepackage{mathdots}", 0) + if j == -1: + document.header.insert(i + 1, "\\use_mathdots 0") + else: + document.header.insert(i + 1, "\\use_mathdots 2") + del document.preamble[j] def revert_mathdots(document): @@ -1749,7 +1834,7 @@ def revert_mathdots(document): mathdots = find_token(document.header, "\\use_mathdots" , 0) if mathdots == -1: - document.warning("No \\usemathdots line. Assuming auto.") + document.warning("No \\use_mathdots line. Assuming auto.") else: val = get_value(document.header, "\\use_mathdots", mathdots) del document.header[mathdots] @@ -1765,7 +1850,7 @@ def revert_mathdots(document): return if usedots == 2: # force load case - add_to_preamble(["\\usepackage{mathdots}"]) + add_to_preamble(document, ["\\usepackage{mathdots}"]) return # so we are in the auto case. we want to load mathdots if \iddots is used. @@ -1888,10 +1973,17 @@ def revert_diagram(document): # only need to do it once! return +chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport", + "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir", + "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono", + "svmult", "tbook", "treport", "tufte-book") def convert_bibtex_clearpage(document): " insert a clear(double)page bibliographystyle if bibtotoc option is used " + if document.textclass not in chapters: + return + i = find_token(document.header, '\\papersides', 0) sides = 0 if i == -1: @@ -1952,6 +2044,415 @@ def convert_bibtex_clearpage(document): j = k + len(subst) +def check_passthru(document): + tc = document.textclass + ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report") + if not ok: + mods = document.get_module_list() + for mod in mods: + if mod == "sweave" or mod == "noweb": + ok = True + break + return ok + + +def convert_passthru(document): + " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html " + if not check_passthru: + return + + rx = re.compile("\\\\begin_layout \s*(\w+)") + beg = 0 + for lay in ["Chunk", "Scrap"]: + while True: + beg = find_token(document.body, "\\begin_layout " + lay, beg) + if beg == -1: + break + end = find_end_of_layout(document.body, beg) + if end == -1: + document.warning("Can't find end of layout at line " + str(beg)) + beg += 1 + continue + + # we are now going to replace newline insets within this layout + # by new instances of this layout. so we have repeated layouts + # instead of newlines. + + # if the paragraph has any customization, however, we do not want to + # do the replacement. + if document.body[beg + 1].startswith("\\"): + beg = end + 1 + continue + + ns = beg + while True: + ns = find_token(document.body, "\\begin_inset Newline newline", ns, end) + if ns == -1: + break + ne = find_end_of_inset(document.body, ns) + if ne == -1 or ne > end: + document.warning("Can't find end of inset at line " + str(nb)) + ns += 1 + continue + if document.body[ne + 1] == "": + ne += 1 + subst = ["\\end_layout", "", "\\begin_layout " + lay] + document.body[ns:ne + 1] = subst + # now we need to adjust end, in particular, but might as well + # do ns properly, too + newlines = (ne - ns) - len(subst) + ns += newlines + 2 + end += newlines + 2 + + # ok, we now want to find out if the next layout is the + # same as this one. if so, we will insert an extra copy of it + didit = False + next = find_token(document.body, "\\begin_layout", end) + if next != -1: + m = rx.match(document.body[next]) + if m: + nextlay = m.group(1) + if nextlay == lay: + subst = ["\\begin_layout " + lay, "", "\\end_layout", ""] + document.body[next:next] = subst + didit = True + beg = end + 1 + if didit: + beg += 4 # for the extra layout + + +def revert_passthru(document): + " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html " + if not check_passthru: + return + rx = re.compile("\\\\begin_layout \s*(\w+)") + beg = 0 + for lay in ["Chunk", "Scrap"]: + while True: + beg = find_token(document.body, "\\begin_layout " + lay, beg) + if beg == -1: + break + end = find_end_of_layout(document.body, beg) + if end == -1: + document.warning("Can't find end of layout at line " + str(beg)) + beg += 1 + continue + + # we now want to find out if the next layout is the + # same as this one. but we will need to do this over and + # over again. + while True: + next = find_token(document.body, "\\begin_layout", end) + if next == -1: + break + m = rx.match(document.body[next]) + if not m: + break + nextlay = m.group(1) + if nextlay != lay: + break + # so it is the same layout again. we now want to know if it is empty. + # but first let's check and make sure there is no content between the + # two layouts. i'm not sure if that can happen or not. + for l in range(end + 1, next): + document.warning("c'" + document.body[l] + "'") + if document.body[l] != "": + document.warning("Found content between adjacent " + lay + " layouts!") + break + nextend = find_end_of_layout(document.body, next) + if nextend == -1: + document.warning("Can't find end of layout at line " + str(next)) + break + empty = True + for l in range(next + 1, nextend): + document.warning("e'" + document.body[l] + "'") + if document.body[l] != "": + empty = False + break + if empty: + # empty layouts just get removed + # should we check if it's before yet another such layout? + del document.body[next : nextend + 1] + # and we do not want to check again. we know the next layout + # should be another Chunk and should be left as is. + break + else: + # if it's not empty, then we want to insert a newline in place + # of the layout switch + subst = ["\\begin_inset Newline newline", "\\end_inset", ""] + document.body[end : next + 1] = subst + # and now we have to find the end of the new, larger layout + newend = find_end_of_layout(document.body, beg) + if newend == -1: + document.warning("Can't find end of new layout at line " + str(beg)) + break + end = newend + beg = end + 1 + + +def revert_multirowOffset(document): + " Revert multirow cells with offset in tables to TeX-code" + # this routine is the same as the revert_multirow routine except that + # it checks additionally for the offset + + # first, let's find out if we need to do anything + i = find_token(document.body, '') + if end_table == -1: + document.warning("Malformed LyX document: Could not find end of table.") + begin_table += 1 + continue + # does this table have multirow? + i = find_token(document.body, '', begin_row, end_table) + if begin_row == -1: + document.warning("Can't find row " + str(row + 1)) + break + end_row = find_end_of(document.body, begin_row, '', '') + if end_row == -1: + document.warning("Can't find end of row " + str(row + 1)) + break + begin_cell = begin_row + multirows.append([]) + for column in range(numcols): + begin_cell = find_token(document.body, '" in a cell, but + # that seems fairly unlikely. + end_cell = find_end_of(document.body, begin_cell, '') + if end_cell == -1: + document.warning("Can't find end of column " + str(column + 1) + \ + "in row " + str(row + 1)) + break + multirows[row].append([begin_cell, end_cell, 0]) + if document.body[begin_cell].find('multirow="3" mroffset=') != -1: + multirows[row][column][2] = 3 # begin multirow + mrstarts.append([row, column]) + elif document.body[begin_cell].find('multirow="4"') != -1: + multirows[row][column][2] = 4 # in multirow + begin_cell = end_cell + begin_row = end_row + # end of table info collection + + # work from the back to avoid messing up numbering + mrstarts.reverse() + for m in mrstarts: + row = m[0] + col = m[1] + # get column width + col_width = get_option_value(document.body[begin_table + 2 + col], "width") + # "0pt" means that no width is specified + if not col_width or col_width == "0pt": + col_width = "*" + # determine the number of cells that are part of the multirow + nummrs = 1 + for r in range(row + 1, numrows): + if multirows[r][col][2] != 4: + break + nummrs += 1 + # take the opportunity to revert this line + lineno = multirows[r][col][0] + document.body[lineno] = document.body[lineno].\ + replace(' multirow="4" ', ' ').\ + replace('valignment="middle"', 'valignment="top"').\ + replace(' topline="true" ', ' ') + # remove bottom line of previous multirow-part cell + lineno = multirows[r-1][col][0] + document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ') + # revert beginning cell + bcell = multirows[row][col][0] + ecell = multirows[row][col][1] + offset = get_option_value(document.body[bcell], "mroffset") + document.body[bcell] = document.body[bcell].\ + replace(' multirow="3" ', ' ').\ + replace('valignment="middle"', 'valignment="top"') + # remove mroffset option + document.body[bcell] = rgx.sub('', document.body[bcell]) + + blay = find_token(document.body, "\\begin_layout", bcell, ecell) + if blay == -1: + document.warning("Can't find layout for cell!") + continue + bend = find_end_of_layout(document.body, blay) + if bend == -1: + document.warning("Can't find end of layout for cell!") + continue + # do the later one first, so as not to mess up the numbering + # we are wrapping the whole cell in this ert + # so before the end of the layout... + document.body[bend:bend] = put_cmd_in_ert("}") + # ...and after the beginning + document.body[blay + 1:blay + 1] = \ + put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \ + + offset + "]{") + + # on to the next table + begin_table = end_table + + +def revert_script(document): + " Convert subscript/superscript inset to TeX code " + i = 0 + foundsubscript = False + while 1: + i = find_token(document.body, '\\begin_inset script', i) + if i == -1: + break + z = find_end_of_inset(document.body, i) + if z == -1: + document.warning("Malformed LyX document: Can't find end of script inset.") + i += 1 + continue + blay = find_token(document.body, "\\begin_layout", i, z) + if blay == -1: + document.warning("Malformed LyX document: Can't find layout in script inset.") + i = z + continue + + if check_token(document.body[i], "\\begin_inset script subscript"): + subst = '\\textsubscript{' + foundsubscript = True + elif check_token(document.body[i], "\\begin_inset script superscript"): + subst = '\\textsuperscript{' + else: + document.warning("Malformed LyX document: Unknown type of script inset.") + i = z + continue + bend = find_end_of_layout(document.body, blay) + if bend == -1 or bend > z: + document.warning("Malformed LyX document: Can't find end of layout in script inset.") + i = z + continue + # remove the \end_layout \end_inset pair + document.body[bend:z + 1] = put_cmd_in_ert("}") + document.body[i:blay + 1] = put_cmd_in_ert(subst) + i += 1 + # these classes provide a \textsubscript command: + # FIXME: Would be nice if we could use the information of the .layout file here + classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"] + if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1: + add_to_preamble(document, ['\\usepackage{subscript}']) + + +def convert_use_xetex(document): + " convert \\use_xetex to \\use_non_tex_fonts " + i = 0 + i = find_token(document.header, "\\use_xetex", 0) + if i == -1: + return + + val = get_value(document.header, "\\use_xetex", 0) + document.header[i] = "\\use_non_tex_fonts " + val + + +def revert_use_xetex(document): + " revert \\use_non_tex_fonts to \\use_xetex " + i = 0 + i = find_token(document.header, "\\use_non_tex_fonts", 0) + if i == -1: + document.warning("Malformed document. No \\use_non_tex_fonts param!") + return + + val = get_value(document.header, "\\use_non_tex_fonts", 0) + document.header[i] = "\\use_xetex " + val + + +def revert_labeling(document): + koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr", + "scrlttr2", "scrreprt") + if document.textclass in koma: + return + i = 0 + while True: + i = find_token_exact(document.body, "\\begin_layout Labeling", i) + if i == -1: + return + document.body[i] = "\\begin_layout List" + + +def revert_langpack(document): + " revert \\language_package parameter " + i = 0 + i = find_token(document.header, "\\language_package", 0) + if i == -1: + document.warning("Malformed document. No \\language_package param!") + return + + del document.header[i] + + +def convert_langpack(document): + " Add \\language_package parameter " + i = find_token(document.header, "\language" , 0) + if i == -1: + document.warning("Malformed document. No \\language defined!") + return + + document.header.insert(i + 1, "\\language_package default") + + +def revert_tabularwidth(document): + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Tabular", i) + if i == -1: + return + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning("Unable to find end of Tabular inset at line " + str(i)) + i += 1 + continue + i += 1 + features = find_token(document.body, "