X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx2lyx_tools.py;h=75a5c820def0b75b958f5b13505e2780ff08fcec;hb=abf38e1820a6e491fe19211538436b0680866879;hp=ac0b0a6c9ecf6fde0d7941db4990964e1298adee;hpb=aef6b1681125515418922ce05b269d7d9c4aa693;p=lyx.git diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py index ac0b0a6c9e..75a5c820de 100644 --- a/lib/lyx2lyx/lyx2lyx_tools.py +++ b/lib/lyx2lyx/lyx2lyx_tools.py @@ -17,8 +17,8 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ''' -This modules offer several free functions to help with lyx2lyx'ing. -More documentaton is below, but here is a quick guide to what +This module offers several free functions to help with lyx2lyx'ing. +More documentaton is below, but here is a quick guide to what they do. Optional arguments are marked by brackets. add_to_preamble(document, text): @@ -37,8 +37,8 @@ insert_to_preamble(document, text[, index]): default index is 0, so the material is inserted at the beginning. Prepends a comment "% Added by lyx2lyx" to text. -put_cmd_in_ert(arg): - Here arg should be a list of strings (lines), which we want to +put_cmd_in_ert(cmd): + Here cmd should be a list of strings (lines), which we want to wrap in ERT. Returns a list of strings so wrapped. A call to this routine will often go something like this: i = find_token('\\begin_inset FunkyInset', ...) @@ -47,23 +47,53 @@ put_cmd_in_ert(arg): ert = put_cmd_in_ert(content) document.body[i:j+1] = ert +get_ert(lines, i[, verbatim]): + Here, lines is a list of lines of LyX material containing an ERT inset, + whose content we want to convert to LaTeX. The ERT starts at index i. + If the optional (by default: False) bool verbatim is True, the content + of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax) + for the use in verbatim insets. + lyx2latex(document, lines): - Here, lines is a list of lines of LyX material we want to convert + Here, lines is a list of lines of LyX material we want to convert to LaTeX. We do the best we can and return a string containing the translated material. +lyx2verbatim(document, lines): + Here, lines is a list of lines of LyX material we want to convert + to verbatim material (used in ERT an the like). We do the best we + can and return a string containing the translated material. + latex_length(slen): - Convert lengths (in LyX form) to their LaTeX representation. Returns - (bool, length), where the bool tells us if it was a percentage, and - the length is the LaTeX representation. + Convert lengths (in LyX form) to their LaTeX representation. Returns + (bool, length), where the bool tells us if it was a percentage, and + the length is the LaTeX representation. + +convert_info_insets(document, type, func): + Applies func to the argument of all info insets matching certain types + type : the type to match. This can be a regular expression. + func : function from string to string to apply to the "arg" field of + the info insets. + +is_document_option(document, option): + Find if _option_ is a document option (\\options in the header). +insert_document_option(document, option): + Insert _option_ as a document option. + +remove_document_option(document, option): + Remove _option_ as a document option. + +revert_language(document, lyxname, babelname, polyglossianame): + Reverts native language support to ERT + If babelname or polyglossianame is empty, it is assumed + this language package is not supported for the given language. ''' -import string -from parser_tools import find_token +import re +from parser_tools import find_token, find_end_of_inset, get_containing_layout, get_value, get_bool_value from unicode_symbols import unicode_reps - # This will accept either a list of lines or a single line. # It is bad practice to pass something with embedded newlines, # though we will handle that. @@ -100,38 +130,75 @@ def add_to_preamble(document, text): # It should really be a list. def insert_to_preamble(document, text, index = 0): """ Insert text to the preamble at a given line""" - + if not type(text) is list: # split on \n just in case # it'll give us the one element list we want # if there's no \n, too text = text.split('\n') - + text.insert(0, "% Added by lyx2lyx") document.preamble[index:index] = text -def put_cmd_in_ert(arg): - ''' - arg should be a list of lines we want to wrap in ERT. - Returns a list of strings, with the lines so wrapped. - ''' - - ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""] - # It will be faster for us to work with a single string internally. - # That way, we only go through the unicode_reps loop once. - if type(arg) is list: - s = "\n".join(arg) +# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method +# Created from the reversed list to keep the first of alternative definitions. +licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1]) + +def put_cmd_in_ert(cmd): + """ + Return ERT inset wrapping `cmd` as a list of strings. + + `cmd` can be a string or list of lines. Non-ASCII characters are converted + to the respective LICR macros if defined in unicodesymbols. + """ + ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""] + # It will be faster to work with a single string internally. + if isinstance(cmd, list): + cmd = u"\n".join(cmd) else: - s = arg - for rep in unicode_reps: - s = s.replace(rep[1], rep[0].replace('\\\\', '\\')) - s = s.replace('\\', "\\backslash\n") - ret += s.splitlines() - ret += ["\\end_layout", "\\end_inset"] + cmd = u"%s" % cmd # ensure it is an unicode instance + cmd = cmd.translate(licr_table) + cmd = cmd.replace("\\", "\n\\backslash\n") + ret += cmd.splitlines() + ret += ["\\end_layout", "", "\\end_inset"] + return ret + + +def get_ert(lines, i, verbatim = False): + 'Convert an ERT inset into LaTeX.' + if not lines[i].startswith("\\begin_inset ERT"): + return "" + j = find_end_of_inset(lines, i) + if j == -1: + return "" + while i < j and not lines[i].startswith("status"): + i = i + 1 + i = i + 1 + ret = "" + first = True + while i < j: + if lines[i] == "\\begin_layout Plain Layout": + if first: + first = False + else: + ret = ret + "\n" + while i + 1 < j and lines[i+1] == "": + i = i + 1 + elif lines[i] == "\\end_layout": + while i + 1 < j and lines[i+1] == "": + i = i + 1 + elif lines[i] == "\\backslash": + if verbatim: + ret = ret + "\n" + lines[i] + "\n" + else: + ret = ret + "\\" + else: + ret = ret + lines[i] + i = i + 1 return ret - + def lyx2latex(document, lines): 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.' @@ -173,6 +240,10 @@ def lyx2latex(document, lines): line = "''" else: line = "'" + elif line.startswith("\\begin_inset Newline newline"): + line = "\\\\ " + elif line.startswith("\\noindent"): + line = "\\noindent " # we need the space behind the command elif line.startswith("\\begin_inset space"): line = line[18:].strip() if line.startswith("\\hspace"): @@ -219,7 +290,7 @@ def lyx2latex(document, lines): # Do the LyX text --> LaTeX conversion for rep in unicode_reps: - line = line.replace(rep[1], rep[0] + "{}") + line = line.replace(rep[1], rep[0]) line = line.replace(r'\backslash', r'\textbackslash{}') line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}') line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}') @@ -234,8 +305,17 @@ def lyx2latex(document, lines): return content +def lyx2verbatim(document, lines): + 'Convert some LyX stuff into corresponding verbatim stuff, as best we can.' + + content = lyx2latex(document, lines) + content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content) + + return content + + def latex_length(slen): - ''' + ''' Convert lengths to their LaTeX representation. Returns (bool, length), where the bool tells us if it was a percentage, and the length is the LaTeX representation. @@ -249,10 +329,15 @@ def latex_length(slen): # the + always precedes the - # Convert relative lengths to LaTeX units - units = {"text%":"\\textwidth", "col%":"\\columnwidth", - "page%":"\\paperwidth", "line%":"\\linewidth", - "theight%":"\\textheight", "pheight%":"\\paperheight"} - for unit in units.keys(): + units = {"col%": "\\columnwidth", + "text%": "\\textwidth", + "page%": "\\paperwidth", + "line%": "\\linewidth", + "theight%": "\\textheight", + "pheight%": "\\paperheight", + "baselineskip%": "\\baselineskip" + } + for unit in list(units.keys()): i = slen.find(unit) if i == -1: continue @@ -288,6 +373,44 @@ def latex_length(slen): return (percent, slen) +def length_in_bp(length): + " Convert a length in LyX format to its value in bp units " + + em_width = 10.0 / 72.27 # assume 10pt font size + text_width = 8.27 / 1.7 # assume A4 with default margins + # scale factors are taken from Length::inInch() + scales = {"bp" : 1.0, + "cc" : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))), + "cm" : (72.0 / 2.54), + "dd" : (72.0 / (72.27 / (0.376 * 2.845))), + "em" : (72.0 * em_width), + "ex" : (72.0 * em_width * 0.4305), + "in" : 72.0, + "mm" : (72.0 / 25.4), + "mu" : (72.0 * em_width / 18.0), + "pc" : (72.0 / (72.27 / 12.0)), + "pt" : (72.0 / (72.27)), + "sp" : (72.0 / (72.27 * 65536.0)), + "text%" : (72.0 * text_width / 100.0), + "col%" : (72.0 * text_width / 100.0), # assume 1 column + "page%" : (72.0 * text_width * 1.7 / 100.0), + "line%" : (72.0 * text_width / 100.0), + "theight%" : (72.0 * text_width * 1.787 / 100.0), + "pheight%" : (72.0 * text_width * 2.2 / 100.0)} + + rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$') + m = rx.match(length) + if not m: + document.warning("Invalid length value: " + length + ".") + return 0 + value = m.group(1) + unit = m.group(2) + if not unit in scales.keys(): + document.warning("Unknown length unit: " + unit + ".") + return value + return "%g" % (float(value) * scales[unit]) + + def revert_flex_inset(lines, name, LaTeXname): " Convert flex insets to TeX code " i = 0 @@ -347,7 +470,7 @@ def revert_font_attrs(lines, name, LaTeXname): while True: i = find_token(lines, name + ' on', i) if i == -1: - return changed + break j = find_token(lines, name + ' default', i) k = find_token(lines, name + ' on', i + 1) # if there is no default set, the style ends with the layout @@ -361,6 +484,16 @@ def revert_font_attrs(lines, name, LaTeXname): changed = True i += 1 + # now delete all remaining lines that manipulate this attribute + i = 0 + while True: + i = find_token(lines, name, i) + if i == -1: + break + del lines[i] + + return changed + def revert_layout_command(lines, name, LaTeXname): " Reverts a command from a layout to TeX code " @@ -405,3 +538,238 @@ def str2bool(s): "'true' goes to True, case-insensitively, and we strip whitespace." s = s.strip().lower() return s == "true" + + +def convert_info_insets(document, type, func): + "Convert info insets matching type using func." + i = 0 + type_re = re.compile(r'^type\s+"(%s)"$' % type) + arg_re = re.compile(r'^arg\s+"(.*)"$') + while True: + i = find_token(document.body, "\\begin_inset Info", i) + if i == -1: + return + t = type_re.match(document.body[i + 1]) + if t: + arg = arg_re.match(document.body[i + 2]) + if arg: + new_arg = func(arg.group(1)) + document.body[i + 2] = 'arg "%s"' % new_arg + i += 3 + + +def insert_document_option(document, option): + "Insert _option_ as a document option." + + # Find \options in the header + options_line = find_token(document.header, "\\options", 0) + + # if the options does not exists add it after the textclass + if options_line == -1: + textclass_line = find_token(document.header, "\\textclass", 0) + document.header.insert(textclass_line +1, + r"\options %s" % option) + return + + # add it to the end of the options + document.header[options_line] += ",%s" % option + + +def remove_document_option(document, option): + """ Remove _option_ as a document option. + + It is assumed that option belongs to the \options. + That can be done running is_document_option(document, option).""" + + options_line = find_token(document.header, "\\options", 0) + option_pos = document.header[options_line].find(option) + + # Remove option from \options + comma_before_pos = document.header[options_line].rfind(',', 0, option_pos) + comma_after_pos = document.header[options_line].find(',', option_pos) + + # if there are no commas then it is the single option + # and the options line should be removed since it will be empty + if comma_before_pos == comma_after_pos == -1: + del document.header[options_line] + return + + # last option + options = document.header[options_line] + if comma_after_pos == -1: + document.header[options_line] = options[:comma_before_pos].rsplit() + return + + document.header[options_line] = options[comma_before_pos: comma_after_pos] + + +def is_document_option(document, option): + "Find if _option_ is a document option" + + # Find \options in the header + options_line = find_token(document.header, "\\options", 0) + + # \options is not present in the header + if options_line == -1: + return False + + option_pos = document.header[options_line].find(option) + # option is not present in the \options + if option_pos == -1: + return False + + return True + + +def revert_language(document, lyxname, babelname, polyglossianame): + " Revert native language support " + + # Are we using polyglossia? + use_polyglossia = False + if get_bool_value(document.header, "\\use_non_tex_fonts"): + i = find_token(document.header, "\\language_package") + if i == -1: + document.warning("Malformed document! Missing \\language_package") + else: + pack = get_value(document.header, "\\language_package", i) + if pack == "default" or pack == "auto": + use_polyglossia = True + + # Do we use this language with polyglossia? + with_polyglossia = use_polyglossia and polyglossianame != "" + # Do we use this language with babel? + with_babel = with_polyglossia == False and babelname != "" + + # Are we dealing with a primary or secondary language? + primary = False + secondary = False + + orig_doc_language = document.language + # Main language first + if document.language == lyxname: + primary = True + document.language = "english" + i = find_token(document.header, "\\language %s" % lyxname, 0) + if i != -1: + document.header[i] = "\\language english" + j = find_token(document.header, "\\language_package default", 0) + if j != -1: + document.header[j] = "\\language_package default" + if with_polyglossia: + add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame]) + document.body[2 : 2] = ["\\begin_layout Standard", + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "resetdefaultlanguage{%s}" % polyglossianame, + "\\end_layout", "", "\\end_inset", "", "", + "\\end_layout", ""] + + # Now secondary languages + i = 0 + while True: + i = find_token(document.body, '\\lang', i) + if i == -1: + break + if document.body[i].startswith('\\lang %s' % lyxname): + secondary = True + endlang = get_containing_layout(document.body, i)[2] + langswitch = find_token(document.body, '\\lang', i + 1, endlang) + startlayout = "\\begin_layout Standard" + endlayout = "\\end_layout" + if langswitch != -1: + endlang = langswitch + startlayout = "" + endlayout = "" + if with_polyglossia: + add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{%s}}" % polyglossianame]) + document.body[endlang : endlang] = [startlayout, + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "end{%s}" % polyglossianame, + "\\end_layout", "", "\\end_inset", "", "", + endlayout, ""] + elif with_babel: + document.body[endlang : endlang] = [startlayout, + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "end{otherlanguage}", + "\\end_layout", "", "\\end_inset", "", "", + endlayout, ""] + del document.body[i] + if with_polyglossia: + document.body[i : i] = ["\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "begin{%s}" % polyglossianame, + "\\end_layout", "", "\\end_inset", "", "", + ""] + elif with_babel: + document.body[i : i] = ["\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "begin{otherlanguage}{%s}" % babelname, + "\\end_layout", "", "\\end_inset", "", "", + ""] + elif primary and document.body[i].startswith('\\lang english'): + # Since we switched the main language manually, English parts need to be marked + endlang = get_containing_layout(document.body, i)[2] + langswitch = find_token(document.body, '\\lang', i + 1, endlang) + startlayout = "\\begin_layout Standard" + endlayout = "\\end_layout" + if langswitch != -1: + endlang = langswitch + startlayout = "" + endlayout = "" + if with_polyglossia: + parent = get_containing_layout(document.body, i) + document.body[endlang : endlang] = [startlayout, + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "end{english}", + "\\end_layout", "", "\\end_inset", "", "", + endlayout, ""] + elif with_babel: + parent = get_containing_layout(document.body, i) + document.body[endlang : endlang] = [startlayout, + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "end{otherlanguage}", + "\\end_layout", "", "\\end_inset", "", "", + endlayout, ""] + del document.body[i] + if with_polyglossia: + document.body[i : i] = ["\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "begin{english}", + "\\end_layout", "", "\\end_inset", "", "", + ""] + elif with_babel: + document.body[i : i] = ["\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "begin{otherlanguage}{english}", + "\\end_layout", "", "\\end_inset", "", "", + ""] + else: + i += 1 + + # With babel, we need to add the language options + if with_babel and (primary or secondary): + insert_document_option(document, babelname) + if secondary and document.body[10] != "selectlanguage{%s}" % orig_doc_language: + # Since the user options are always placed after the babel options, + # we need to reset the main language + document.body[2 : 2] = ["\\begin_layout Standard", + "\\begin_inset ERT", "status open", "", + "\\begin_layout Plain Layout", "", "", + "\\backslash", + "selectlanguage{%s}" % orig_doc_language, + "\\end_layout", "", "\\end_inset", "", "", + "\\end_layout", ""] +