X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx_1_5.py;h=d3de88f9981148e5d3b50d27f038f5bbc944e965;hb=9da74fe2078e24e1e7891784ecbfe33ff77e7f85;hp=78f703a7629693b209b6b88d5fe1fde1c4382d14;hpb=c267eec1c91bcb7151a37c7355c569cd845f7cc2;p=lyx.git diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index 78f703a762..d3de88f998 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2006 José Matos +# -*- coding: utf-8 -*- +# Copyright (C) 2006 José Matos # Copyright (C) 2004-2006 Georg Baum # # This program is free software; you can redistribute it and/or @@ -15,25 +15,50 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +""" Convert files to the file format generated by lyx 1.5""" import re -from parser_tools import find_token, find_token_exact, find_tokens, find_end_of_inset, get_value -from string import replace +import unicodedata +import sys, os + +from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line +from LyX import get_encoding + + +#################################################################### +# Private helper functions + +def find_end_of_inset(lines, i): + " Find end of inset, where lines[i] is included." + return find_end_of(lines, i, "\\begin_inset", "\\end_inset") + +def find_end_of_layout(lines, i): + " Find end of layout, where lines[i] is included." + return find_end_of(lines, i, "\\begin_layout", "\\end_layout") + +def find_beginning_of_layout(lines, i): + "Find beginning of layout, where lines[i] is included." + return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout") + +# End of helper functions +#################################################################### ## # Notes: Framed/Shaded # -def revert_framed(file): +def revert_framed(document): + "Revert framed notes. " i = 0 while 1: - i = find_tokens(file.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) + i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) if i == -1: return - file.body[i] = "\\begin_inset Note" + document.body[i] = "\\begin_inset Note" i = i + 1 @@ -57,20 +82,21 @@ typewriter_fonts = {'default' : 'default', 'ae' : 'default', 'newcent' : 'default', 'bookman' : 'default', 'pslatex' : 'courier'} -def convert_font_settings(file): +def convert_font_settings(document): + " Convert font settings. " i = 0 - i = find_token_exact(file.header, "\\fontscheme", i) + i = find_token_exact(document.header, "\\fontscheme", i) if i == -1: - file.warning("Malformed LyX file: Missing `\\fontscheme'.") + document.warning("Malformed LyX document: Missing `\\fontscheme'.") return - font_scheme = get_value(file.header, "\\fontscheme", i, i + 1) + font_scheme = get_value(document.header, "\\fontscheme", i, i + 1) if font_scheme == '': - file.warning("Malformed LyX file: Empty `\\fontscheme'.") + document.warning("Malformed LyX document: Empty `\\fontscheme'.") font_scheme = 'default' if not font_scheme in roman_fonts.keys(): - file.warning("Malformed LyX file: Unknown `\\fontscheme' `%s'." % font_scheme) + document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme) font_scheme = 'default' - file.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme], + document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme], '\\font_sans %s' % sans_fonts[font_scheme], '\\font_typewriter %s' % typewriter_fonts[font_scheme], '\\font_default_family default', @@ -80,124 +106,1920 @@ def convert_font_settings(file): '\\font_tt_scale 100'] -def revert_font_settings(file): +def revert_font_settings(document): + " Revert font settings. " i = 0 insert_line = -1 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'} for family in 'roman', 'sans', 'typewriter': name = '\\font_%s' % family - i = find_token_exact(file.header, name, i) + i = find_token_exact(document.header, name, i) if i == -1: - file.warning("Malformed LyX file: Missing `%s'." % name) + document.warning("Malformed LyX document: Missing `%s'." % name) i = 0 else: if (insert_line < 0): insert_line = i - fonts[family] = get_value(file.header, name, i, i + 1) - del file.header[i] - i = find_token_exact(file.header, '\\font_default_family', i) + fonts[family] = get_value(document.header, name, i, i + 1) + del document.header[i] + i = find_token_exact(document.header, '\\font_default_family', i) if i == -1: - file.warning("Malformed LyX file: Missing `\\font_default_family'.") + document.warning("Malformed LyX document: Missing `\\font_default_family'.") font_default_family = 'default' else: - font_default_family = get_value(file.header, "\\font_default_family", i, i + 1) - del file.header[i] - i = find_token_exact(file.header, '\\font_sc', i) + font_default_family = get_value(document.header, "\\font_default_family", i, i + 1) + del document.header[i] + i = find_token_exact(document.header, '\\font_sc', i) if i == -1: - file.warning("Malformed LyX file: Missing `\\font_sc'.") + document.warning("Malformed LyX document: Missing `\\font_sc'.") font_sc = 'false' else: - font_sc = get_value(file.header, '\\font_sc', i, i + 1) - del file.header[i] + font_sc = get_value(document.header, '\\font_sc', i, i + 1) + del document.header[i] if font_sc != 'false': - file.warning("Conversion of '\\font_sc' not yet implemented.") - i = find_token_exact(file.header, '\\font_osf', i) + document.warning("Conversion of '\\font_sc' not yet implemented.") + i = find_token_exact(document.header, '\\font_osf', i) if i == -1: - file.warning("Malformed LyX file: Missing `\\font_osf'.") + document.warning("Malformed LyX document: Missing `\\font_osf'.") font_osf = 'false' else: - font_osf = get_value(file.header, '\\font_osf', i, i + 1) - del file.header[i] - i = find_token_exact(file.header, '\\font_sf_scale', i) + font_osf = get_value(document.header, '\\font_osf', i, i + 1) + del document.header[i] + i = find_token_exact(document.header, '\\font_sf_scale', i) if i == -1: - file.warning("Malformed LyX file: Missing `\\font_sf_scale'.") + document.warning("Malformed LyX document: Missing `\\font_sf_scale'.") font_sf_scale = '100' else: - font_sf_scale = get_value(file.header, '\\font_sf_scale', i, i + 1) - del file.header[i] + font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1) + del document.header[i] if font_sf_scale != '100': - file.warning("Conversion of '\\font_sf_scale' not yet implemented.") - i = find_token_exact(file.header, '\\font_tt_scale', i) + document.warning("Conversion of '\\font_sf_scale' not yet implemented.") + i = find_token_exact(document.header, '\\font_tt_scale', i) if i == -1: - file.warning("Malformed LyX file: Missing `\\font_tt_scale'.") + document.warning("Malformed LyX document: Missing `\\font_tt_scale'.") font_tt_scale = '100' else: - font_tt_scale = get_value(file.header, '\\font_tt_scale', i, i + 1) - del file.header[i] + font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1) + del document.header[i] if font_tt_scale != '100': - file.warning("Conversion of '\\font_tt_scale' not yet implemented.") + document.warning("Conversion of '\\font_tt_scale' not yet implemented.") for font_scheme in roman_fonts.keys(): if (roman_fonts[font_scheme] == fonts['roman'] and sans_fonts[font_scheme] == fonts['sans'] and typewriter_fonts[font_scheme] == fonts['typewriter']): - file.header.insert(insert_line, '\\fontscheme %s' % font_scheme) + document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if font_default_family != 'default': - file.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) + document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': - file.warning("Ignoring `\\font_osf = true'") + document.warning("Ignoring `\\font_osf = true'") return font_scheme = 'default' - file.header.insert(insert_line, '\\fontscheme %s' % font_scheme) + document.header.insert(insert_line, '\\fontscheme %s' % font_scheme) if fonts['roman'] == 'cmr': - file.preamble.append('\\renewcommand{\\rmdefault}{cmr}') + document.preamble.append('\\renewcommand{\\rmdefault}{cmr}') if font_osf == 'true': - file.preamble.append('\\usepackage{eco}') + document.preamble.append('\\usepackage{eco}') font_osf = 'false' for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery': if fonts['roman'] == font: - file.preamble.append('\\usepackage{%s}' % font) + document.preamble.append('\\usepackage{%s}' % font) for font in 'cmss', 'lmss', 'cmbr': if fonts['sans'] == font: - file.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font) + document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font) for font in 'berasans': if fonts['sans'] == font: - file.preamble.append('\\usepackage{%s}' % font) + document.preamble.append('\\usepackage{%s}' % font) for font in 'cmtt', 'lmtt', 'cmtl': if fonts['typewriter'] == font: - file.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font) + document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font) for font in 'courier', 'beramono', 'luximono': if fonts['typewriter'] == font: - file.preamble.append('\\usepackage{%s}' % font) + document.preamble.append('\\usepackage{%s}' % font) if font_default_family != 'default': - file.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) + document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family) if font_osf == 'true': - file.warning("Ignoring `\\font_osf = true'") + document.warning("Ignoring `\\font_osf = true'") -def revert_booktabs(file): -# we just remove the booktabs flag, everything else will become a mess. +def revert_booktabs(document): + " We remove the booktabs flag or everything else will become a mess. " re_row = re.compile(r'^$') re_tspace = re.compile(r'\s+topspace="[^"]+"') re_bspace = re.compile(r'\s+bottomspace="[^"]+"') re_ispace = re.compile(r'\s+interlinespace="[^"]+"') i = 0 while 1: - i = find_token(file.body, "\\begin_inset Tabular", i) + i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return - j = find_end_of_inset(file.body, i + 1) + j = find_end_of_inset(document.body, i + 1) if j == -1: - file.warning("Malformed LyX file: Could not find end of tabular.") + document.warning("Malformed LyX document: Could not find end of tabular.") continue for k in range(i, j): - if re.search('^$', file.body[k]): - file.warning("Converting 'booktabs' table to normal table.") - file.body[k] = replace(file.body[k], ' booktabs="true"', '') - if re.search(re_row, file.body[k]): - file.warning("Removing extra row space.") - file.body[k] = re_tspace.sub('', file.body[k]) - file.body[k] = re_bspace.sub('', file.body[k]) - file.body[k] = re_ispace.sub('', file.body[k]) + if re.search('^$', document.body[k]): + document.warning("Converting 'booktabs' table to normal table.") + document.body[k] = document.body[k].replace(' booktabs="true"', '') + if re.search(re_row, document.body[k]): + document.warning("Removing extra row space.") + document.body[k] = re_tspace.sub('', document.body[k]) + document.body[k] = re_bspace.sub('', document.body[k]) + document.body[k] = re_ispace.sub('', document.body[k]) + i = i + 1 + + +def convert_multiencoding(document, forward): + """ Fix files with multiple encodings. +Files with an inputencoding of "auto" or "default" and multiple languages +where at least two languages have different default encodings are encoded +in multiple encodings for file formats < 249. These files are incorrectly +read and written (as if the whole file was in the encoding of the main +language). +This is not true for files written by CJK-LyX, they are always in the locale +encoding. + +This function +- converts from fake unicode values to true unicode if forward is true, and +- converts from true unicode values to fake unicode if forward is false. +document.encoding must be set to the old value (format 248) in both cases. + +We do this here and not in LyX.py because it is far easier to do the +necessary parsing in modern formats than in ancient ones. +""" + inset_types = ["Foot", "Note"] + if document.cjk_encoding != '': + return + encoding_stack = [document.encoding] + insets = [] + lang_re = re.compile(r"^\\lang\s(\S+)") + inset_re = re.compile(r"^\\begin_inset\s(\S+)") + if not forward: # no need to read file unless we are reverting + spec_chars = read_unicodesymbols() + + if document.inputencoding == "auto" or document.inputencoding == "default": + i = 0 + while i < len(document.body): + result = lang_re.match(document.body[i]) + if result: + language = result.group(1) + if language == "default": + document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3) + encoding_stack[-1] = document.encoding + else: + from lyx2lyx_lang import lang + document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3) + encoding_stack[-1] = lang[language][3] + elif find_token(document.body, "\\begin_layout", i, i + 1) == i: + document.warning("Adding nested encoding %s." % encoding_stack[-1], 3) + if len(insets) > 0 and insets[-1] in inset_types: + from lyx2lyx_lang import lang + encoding_stack.append(lang[document.language][3]) + else: + encoding_stack.append(encoding_stack[-1]) + elif find_token(document.body, "\\end_layout", i, i + 1) == i: + document.warning("Removing nested encoding %s." % encoding_stack[-1], 3) + if len(encoding_stack) == 1: + # Don't remove the document encoding from the stack + document.warning("Malformed LyX document: Unexpected `\\end_layout'.") + else: + del encoding_stack[-1] + elif find_token(document.body, "\\begin_inset", i, i + 1) == i: + inset_result = inset_re.match(document.body[i]) + if inset_result: + insets.append(inset_result.group(1)) + else: + insets.append("") + elif find_token(document.body, "\\end_inset", i, i + 1) == i: + del insets[-1] + if encoding_stack[-1] != document.encoding: + if forward: + # This line has been incorrectly interpreted as if it was + # encoded in 'encoding'. + # Convert back to the 8bit string that was in the file. + orig = document.body[i].encode(document.encoding) + # Convert the 8bit string that was in the file to unicode + # with the correct encoding. + document.body[i] = orig.decode(encoding_stack[-1]) + else: + try: + # Convert unicode to the 8bit string that will be written + # to the file with the correct encoding. + orig = document.body[i].encode(encoding_stack[-1]) + # Convert the 8bit string that will be written to the + # file to fake unicode with the encoding that will later + # be used when writing to the file. + document.body[i] = orig.decode(document.encoding) + except: + mod_line = revert_unicode_line(document, i, insets, spec_chars) + document.body[i:i+1] = mod_line.split('\n') + i += len(mod_line.split('\n')) - 1 + i += 1 + + +def convert_utf8(document): + " Set document encoding to UTF-8. " + convert_multiencoding(document, True) + document.encoding = "utf8" + + +def revert_utf8(document): + " Set document encoding to the value corresponding to inputencoding. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + elif get_value(document.header, "\\inputencoding", i) == "utf8": + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding) + convert_multiencoding(document, False) + + +def read_unicodesymbols(): + " Read the unicodesymbols list of unicode characters and corresponding commands." + pathname = os.path.abspath(os.path.dirname(sys.argv[0])) + fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')) + spec_chars = {} + for line in fp.readlines(): + if line[0] != '#': + line=line.replace(' "',' ') # remove all quotation marks with spaces before + line=line.replace('" ',' ') # remove all quotation marks with spaces after + line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis) + try: + # flag1 and flag2 are preamble and other flags + [ucs4,command,flag1,flag2] =line.split(None,3) + spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2] + except: + pass + fp.close() + return spec_chars + + +def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'): + # Define strings to start and end ERT and math insets + ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout + ert_outro='\n\\end_layout\n\n\\end_inset\n' + math_intro='\n\\begin_inset Formula $' + math_outro='$\n\\end_inset' + + mod_line = u'' + if i and not is_inset_line(document, i-1): + last_char = document.body[i - 1][-1:] + else: + last_char = '' + + line = document.body[i] + for character in line: + try: + # Try to write the character + dummy = character.encode(document.encoding) + mod_line += character + last_char = character + except: + # Try to replace with ERT/math inset + if spec_chars.has_key(character): + command = spec_chars[character][0] # the command to replace unicode + flag1 = spec_chars[character][1] + flag2 = spec_chars[character][2] + if flag1.find('combining') > -1 or flag2.find('combining') > -1: + # We have a character that should be combined with the previous + command += '{' + last_char + '}' + # Remove the last character. Ignore if it is whitespace + if len(last_char.rstrip()): + # last_char was found and is not whitespace + if mod_line: + mod_line = mod_line[:-1] + else: # last_char belongs to the last line + document.body[i-1] = document.body[i-1][:-1] + else: + # The last character was replaced by a command. For now it is + # ignored. This could be handled better. + pass + if command[0:2] == '\\\\': + if command[2:12]=='ensuremath': + if insets and insets[-1] == "ERT": + # math in ERT + command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n') + command = command.replace('}', '$\n') + elif not insets or insets[-1] != "Formula": + # add a math inset with the replacement character + command = command.replace('\\\\ensuremath{\\', math_intro) + command = command.replace('}', math_outro) + else: + # we are already in a math inset + command = command.replace('\\\\ensuremath{\\', '') + command = command.replace('}', '') + else: + if insets and insets[-1] == "Formula": + # avoid putting an ERT in a math; instead put command as text + command = command.replace('\\\\', '\mathrm{') + command = command + '}' + elif not insets or insets[-1] != "ERT": + # add an ERT inset with the replacement character + command = command.replace('\\\\', ert_intro) + command = command + ert_outro + else: + command = command.replace('\\\\', '\n\\backslash\n') + last_char = '' # indicate that the character should not be removed + mod_line += command + else: + # Replace with replacement string + mod_line += replacement_character + return mod_line + + +def revert_unicode(document): + '''Transform unicode characters that can not be written using the +document encoding to commands according to the unicodesymbols +file. Characters that can not be replaced by commands are replaced by +an replacement string. Flags other than 'combined' are currently not +implemented.''' + spec_chars = read_unicodesymbols() + insets = [] # list of active insets + + # Go through the document to capture all combining characters + i = 0 + while i < len(document.body): + line = document.body[i] + # Check for insets + if line.find('\\begin_inset') > -1: + insets.append(line[13:].split()[0]) + if line.find('\\end_inset') > -1: + del insets[-1] + + # Try to write the line + try: + # If all goes well the line is written here + dummy = line.encode(document.encoding) + i += 1 + except: + # Error, some character(s) in the line need to be replaced + mod_line = revert_unicode_line(document, i, insets, spec_chars) + document.body[i:i+1] = mod_line.split('\n') + i += len(mod_line.split('\n')) + + +def revert_cs_label(document): + " Remove status flag of charstyle label. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset CharStyle", i) + if i == -1: + return + # Seach for a line starting 'show_label' + # If it is not there, break with a warning message + i = i + 1 + while 1: + if (document.body[i][:10] == "show_label"): + del document.body[i] + break + elif (document.body[i][:13] == "\\begin_layout"): + document.warning("Malformed LyX document: Missing 'show_label'.") + break + i = i + 1 + + i = i + 1 + + +def convert_bibitem(document): + """ Convert +\bibitem [option]{argument} + +to + +\begin_inset LatexCommand bibitem +label "option" +key "argument" + +\end_inset + +This must be called after convert_commandparams. +""" + i = 0 + while 1: + i = find_token(document.body, "\\bibitem", i) + if i == -1: + break + j = document.body[i].find('[') + 1 + k = document.body[i].rfind(']') + if j == 0: # No optional argument found + option = None + else: + option = document.body[i][j:k] + j = document.body[i].rfind('{') + 1 + k = document.body[i].rfind('}') + argument = document.body[i][j:k] + lines = ['\\begin_inset LatexCommand bibitem'] + if option != None: + lines.append('label "%s"' % option.replace('"', '\\"')) + lines.append('key "%s"' % argument.replace('"', '\\"')) + lines.append('') + lines.append('\\end_inset') + document.body[i:i+1] = lines + i = i + 1 + + +commandparams_info = { + # command : [option1, option2, argument] + "bibitem" : ["label", "", "key"], + "bibtex" : ["options", "btprint", "bibfiles"], + "cite" : ["after", "before", "key"], + "citet" : ["after", "before", "key"], + "citep" : ["after", "before", "key"], + "citealt" : ["after", "before", "key"], + "citealp" : ["after", "before", "key"], + "citeauthor" : ["after", "before", "key"], + "citeyear" : ["after", "before", "key"], + "citeyearpar" : ["after", "before", "key"], + "citet*" : ["after", "before", "key"], + "citep*" : ["after", "before", "key"], + "citealt*" : ["after", "before", "key"], + "citealp*" : ["after", "before", "key"], + "citeauthor*" : ["after", "before", "key"], + "Citet" : ["after", "before", "key"], + "Citep" : ["after", "before", "key"], + "Citealt" : ["after", "before", "key"], + "Citealp" : ["after", "before", "key"], + "Citeauthor" : ["after", "before", "key"], + "Citet*" : ["after", "before", "key"], + "Citep*" : ["after", "before", "key"], + "Citealt*" : ["after", "before", "key"], + "Citealp*" : ["after", "before", "key"], + "Citeauthor*" : ["after", "before", "key"], + "citefield" : ["after", "before", "key"], + "citetitle" : ["after", "before", "key"], + "cite*" : ["after", "before", "key"], + "hfill" : ["", "", ""], + "index" : ["", "", "name"], + "printindex" : ["", "", "name"], + "label" : ["", "", "name"], + "eqref" : ["name", "", "reference"], + "pageref" : ["name", "", "reference"], + "prettyref" : ["name", "", "reference"], + "ref" : ["name", "", "reference"], + "vpageref" : ["name", "", "reference"], + "vref" : ["name", "", "reference"], + "tableofcontents" : ["", "", "type"], + "htmlurl" : ["name", "", "target"], + "url" : ["name", "", "target"]} + + +def convert_commandparams(document): + """ Convert + + \begin_inset LatexCommand \cmdname[opt1][opt2]{arg} + \end_inset + + to + + \begin_inset LatexCommand cmdname + name1 "opt1" + name2 "opt2" + name3 "arg" + \end_inset + + name1, name2 and name3 can be different for each command. +""" + # \begin_inset LatexCommand bibitem was not the official version (see + # convert_bibitem()), but could be read in, so we convert it here, too. + + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset LatexCommand", i) + if i == -1: + break + command = document.body[i][26:].strip() + if command == "": + document.warning("Malformed LyX document: Missing LatexCommand name.") + i = i + 1 + continue + + j = find_token(document.body, "\\end_inset", i + 1) + if j == -1: + document.warning("Malformed document") + else: + command += "".join(document.body[i+1:j]) + document.body[i+1:j] = [] + + # The following parser is taken from the original InsetCommandParams::scanCommand + name = "" + option1 = "" + option2 = "" + argument = "" + state = "WS" + # Used to handle things like \command[foo[bar]]{foo{bar}} + nestdepth = 0 + b = 0 + for c in command: + if ((state == "CMDNAME" and c == ' ') or + (state == "CMDNAME" and c == '[') or + (state == "CMDNAME" and c == '{')): + state = "WS" + if ((state == "OPTION" and c == ']') or + (state == "SECOPTION" and c == ']') or + (state == "CONTENT" and c == '}')): + if nestdepth == 0: + state = "WS" + else: + nestdepth = nestdepth - 1 + if ((state == "OPTION" and c == '[') or + (state == "SECOPTION" and c == '[') or + (state == "CONTENT" and c == '{')): + nestdepth = nestdepth + 1 + if state == "CMDNAME": + name += c + elif state == "OPTION": + option1 += c + elif state == "SECOPTION": + option2 += c + elif state == "CONTENT": + argument += c + elif state == "WS": + if c == '\\': + state = "CMDNAME" + elif c == '[' and b != ']': + state = "OPTION" + nestdepth = 0 # Just to be sure + elif c == '[' and b == ']': + state = "SECOPTION" + nestdepth = 0 # Just to be sure + elif c == '{': + state = "CONTENT" + nestdepth = 0 # Just to be sure + b = c + + # Now we have parsed the command, output the parameters + lines = ["\\begin_inset LatexCommand %s" % name] + if option1 != "": + if commandparams_info[name][0] == "": + document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name)) + else: + lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"'))) + if option2 != "": + if commandparams_info[name][1] == "": + document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name)) + else: + lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"'))) + if argument != "": + if commandparams_info[name][2] == "": + document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name)) + else: + lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"'))) + document.body[i:i+1] = lines + i = i + 1 + + +def revert_commandparams(document): + regex = re.compile(r'(\S+)\s+(.+)') + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset LatexCommand", i) + if i == -1: + break + name = document.body[i].split()[2] + j = find_end_of_inset(document.body, i) + preview_line = "" + option1 = "" + option2 = "" + argument = "" + for k in range(i + 1, j): + match = re.match(regex, document.body[k]) + if match: + pname = match.group(1) + pvalue = match.group(2) + if pname == "preview": + preview_line = document.body[k] + elif (commandparams_info[name][0] != "" and + pname == commandparams_info[name][0]): + option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') + elif (commandparams_info[name][1] != "" and + pname == commandparams_info[name][1]): + option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') + elif (commandparams_info[name][2] != "" and + pname == commandparams_info[name][2]): + argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') + elif document.body[k].strip() != "": + document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name)) + if name == "bibitem": + if option1 == "": + lines = ["\\bibitem {%s}" % argument] + else: + lines = ["\\bibitem [%s]{%s}" % (option1, argument)] + else: + if option1 == "": + if option2 == "": + lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)] + else: + lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)] + else: + if option2 == "": + lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)] + else: + lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)] + if name != "bibitem": + if preview_line != "": + lines.append(preview_line) + lines.append('') + lines.append('\\end_inset') + document.body[i:j+1] = lines + i += len(lines) + 1 + + +def revert_nomenclature(document): + " Convert nomenclature entry to ERT. " + regex = re.compile(r'(\S+)\s+(.+)') + i = 0 + use_nomencl = 0 + while 1: + i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i) + if i == -1: + break + use_nomencl = 1 + j = find_end_of_inset(document.body, i + 1) + preview_line = "" + symbol = "" + description = "" + prefix = "" + for k in range(i + 1, j): + match = re.match(regex, document.body[k]) + if match: + name = match.group(1) + value = match.group(2) + if name == "preview": + preview_line = document.body[k] + elif name == "symbol": + symbol = value.strip('"').replace('\\"', '"') + elif name == "description": + description = value.strip('"').replace('\\"', '"') + elif name == "prefix": + prefix = value.strip('"').replace('\\"', '"') + elif document.body[k].strip() != "": + document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]) + if prefix == "": + command = 'nomenclature{%s}{%s}' % (symbol, description) + else: + command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description) + document.body[i:j+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + command, + '\\end_layout', + '', + '\\end_inset'] + i = i + 11 + if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: + document.preamble.append('\\usepackage{nomencl}[2005/09/22]') + document.preamble.append('\\makenomenclature') + + +def revert_printnomenclature(document): + " Convert printnomenclature to ERT. " + regex = re.compile(r'(\S+)\s+(.+)') + i = 0 + use_nomencl = 0 + while 1: + i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i) + if i == -1: + break + use_nomencl = 1 + j = find_end_of_inset(document.body, i + 1) + preview_line = "" + labelwidth = "" + for k in range(i + 1, j): + match = re.match(regex, document.body[k]) + if match: + name = match.group(1) + value = match.group(2) + if name == "preview": + preview_line = document.body[k] + elif name == "labelwidth": + labelwidth = value.strip('"').replace('\\"', '"') + elif document.body[k].strip() != "": + document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k]) + if labelwidth == "": + command = 'nomenclature{}' + else: + command = 'nomenclature[%s]' % labelwidth + document.body[i:j+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + command, + '\\end_layout', + '', + '\\end_inset'] + i = i + 11 + if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: + document.preamble.append('\\usepackage{nomencl}[2005/09/22]') + document.preamble.append('\\makenomenclature') + + +def convert_esint(document): + " Add \\use_esint setting to header. " + i = find_token(document.header, "\\cite_engine", 0) + if i == -1: + document.warning("Malformed LyX document: Missing `\\cite_engine'.") + return + # 0 is off, 1 is auto, 2 is on. + document.header.insert(i, '\\use_esint 0') + + +def revert_esint(document): + " Remove \\use_esint setting from header. " + i = find_token(document.header, "\\use_esint", 0) + if i == -1: + document.warning("Malformed LyX document: Missing `\\use_esint'.") + return + use_esint = document.header[i].split()[1] + del document.header[i] + # 0 is off, 1 is auto, 2 is on. + if (use_esint == 2): + document.preamble.append('\\usepackage{esint}') + + +def revert_clearpage(document): + " clearpage -> ERT " + i = 0 + while 1: + i = find_token(document.body, "\\clearpage", i) + if i == -1: + break + document.body[i:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + 'clearpage', + '\\end_layout', + '', + '\\end_inset'] + i = i + 1 + + +def revert_cleardoublepage(document): + " cleardoublepage -> ERT " + i = 0 + while 1: + i = find_token(document.body, "\\cleardoublepage", i) + if i == -1: + break + document.body[i:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + 'cleardoublepage', + '\\end_layout', + '', + '\\end_inset'] + i = i + 1 + + +def convert_lyxline(document): + " remove fontsize commands for \lyxline " + # The problematic is: The old \lyxline definition doesn't handle the fontsize + # to change the line thickness. The new definiton does this so that imported + # \lyxlines would have a different line thickness. The eventual fontsize command + # before \lyxline is therefore removed to get the same output. + fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize", + "large", "Large", "LARGE", "huge", "Huge"] + for n in range(0, len(fontsizes)): + i = 0 + k = 0 + while i < len(document.body): + i = find_token(document.body, "\\size " + fontsizes[n], i) + k = find_token(document.body, "\\lyxline", i) + # the corresponding fontsize command is always 2 lines before the \lyxline + if (i != -1 and k == i+2): + document.body[i:i+1] = [] + else: + break + i = i + 1 + + +def revert_encodings(document): + " Set new encodings to auto. " + encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852", + "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250", + "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"] + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc in encodings: + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def convert_caption(document): + " Convert caption layouts to caption insets. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_layout Caption", i) + if i == -1: + return + j = find_end_of_layout(document.body, i) + if j == -1: + document.warning("Malformed LyX document: Missing `\\end_layout'.") + return + + document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""] + document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout, + "\\begin_inset Caption", "", + "\\begin_layout %s" % document.default_layout] + i = i + 1 + + +def revert_caption(document): + " Convert caption insets to caption layouts. " + " This assumes that the text class has a caption style. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset Caption", i) + if i == -1: + return + + # We either need to delete the previous \begin_layout line, or we + # need to end the previous layout if this inset is not in the first + # position of the paragraph. + layout_before = find_token_backwards(document.body, "\\begin_layout", i) + if layout_before == -1: + document.warning("Malformed LyX document: Missing `\\begin_layout'.") + return + layout_line = document.body[layout_before] + del_layout_before = True + l = layout_before + 1 + while l < i: + if document.body[l] != "": + del_layout_before = False + break + l = l + 1 + if del_layout_before: + del document.body[layout_before:i] + i = layout_before + else: + document.body[i:i] = ["\\end_layout", ""] + i = i + 2 + + # Find start of layout in the inset and end of inset + j = find_token(document.body, "\\begin_layout", i) + if j == -1: + document.warning("Malformed LyX document: Missing `\\begin_layout'.") + return + k = find_end_of_inset(document.body, i) + if k == -1: + document.warning("Malformed LyX document: Missing `\\end_inset'.") + return + + # We either need to delete the following \end_layout line, or we need + # to restart the old layout if this inset is not at the paragraph end. + layout_after = find_token(document.body, "\\end_layout", k) + if layout_after == -1: + document.warning("Malformed LyX document: Missing `\\end_layout'.") + return + del_layout_after = True + l = k + 1 + while l < layout_after: + if document.body[l] != "": + del_layout_after = False + break + l = l + 1 + if del_layout_after: + del document.body[k+1:layout_after+1] + else: + document.body[k+1:k+1] = [layout_line, ""] + + # delete \begin_layout and \end_inset and replace \begin_inset with + # "\begin_layout Caption". This works because we can only have one + # paragraph in the caption inset: The old \end_layout will be recycled. + del document.body[k] + if document.body[k] == "": + del document.body[k] + del document.body[j] + if document.body[j] == "": + del document.body[j] + document.body[i] = "\\begin_layout Caption" + if document.body[i+1] == "": + del document.body[i+1] + i = i + 1 + + +# Accents of InsetLaTeXAccent +accent_map = { + "`" : u'\u0300', # grave + "'" : u'\u0301', # acute + "^" : u'\u0302', # circumflex + "~" : u'\u0303', # tilde + "=" : u'\u0304', # macron + "u" : u'\u0306', # breve + "." : u'\u0307', # dot above + "\"": u'\u0308', # diaeresis + "r" : u'\u030a', # ring above + "H" : u'\u030b', # double acute + "v" : u'\u030c', # caron + "b" : u'\u0320', # minus sign below + "d" : u'\u0323', # dot below + "c" : u'\u0327', # cedilla + "k" : u'\u0328', # ogonek + "t" : u'\u0361' # tie. This is special: It spans two characters, but + # only one is given as argument, so we don't need to + # treat it differently. +} + + +# special accents of InsetLaTeXAccent without argument +special_accent_map = { + 'i' : u'\u0131', # dotless i + 'j' : u'\u0237', # dotless j + 'l' : u'\u0142', # l with stroke + 'L' : u'\u0141' # L with stroke +} + + +# special accent arguments of InsetLaTeXAccent +accented_map = { + '\\i' : u'\u0131', # dotless i + '\\j' : u'\u0237' # dotless j +} + + +def _convert_accent(accent, accented_char): + type = accent + char = accented_char + if char == '': + if type in special_accent_map: + return special_accent_map[type] + # a missing char is treated as space by LyX + char = ' ' + elif type == 'q' and char in ['t', 'd', 'l', 'L']: + # Special caron, only used with t, d, l and L. + # It is not in the map because we convert it to the same unicode + # character as the normal caron: \q{} is only defined if babel with + # the czech or slovak language is used, and the normal caron + # produces the correct output if the T1 font encoding is used. + # For the same reason we never convert to \q{} in the other direction. + type = 'v' + elif char in accented_map: + char = accented_map[char] + elif (len(char) > 1): + # We can only convert accents on a single char + return '' + a = accent_map.get(type) + if a: + return unicodedata.normalize("NFC", "%s%s" % (char, a)) + return '' + + +def convert_ertbackslash(body, i, ert, default_layout): + r""" ------------------------------------------------------------------------------------------- + Convert backslashes and '\n' into valid ERT code, append the converted + text to body[i] and return the (maybe incremented) line index i""" + + for c in ert: + if c == '\\': + body[i] = body[i] + '\\backslash ' + i = i + 1 + body.insert(i, '') + elif c == '\n': + body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, ''] + i = i + 4 + else: + body[i] = body[i] + c + return i + + +def convert_accent(document): + # The following forms are supported by LyX: + # '\i \"{a}' (standard form, as written by LyX) + # '\i \"{}' (standard form, as written by LyX if the accented char is a space) + # '\i \"{ }' (also accepted if the accented char is a space) + # '\i \" a' (also accepted) + # '\i \"' (also accepted) + re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$') + re_contents = re.compile(r'^([^\s{]+)(.*)$') + re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$') + i = 0 + while 1: + i = find_re(document.body, re_wholeinset, i) + if i == -1: + return + match = re_wholeinset.match(document.body[i]) + prefix = match.group(1) + contents = match.group(3).strip() + match = re_contents.match(contents) + if match: + # Strip first char (always \) + accent = match.group(1)[1:] + accented_contents = match.group(2).strip() + match = re_accentedcontents.match(accented_contents) + accented_char = match.group(1) + converted = _convert_accent(accent, accented_char) + if converted == '': + # Normalize contents + contents = '%s{%s}' % (accent, accented_char), + else: + document.body[i] = '%s%s' % (prefix, converted) + i += 1 + continue + document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents) + document.body[i] = prefix + document.body[i+1:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + ''] + i = convert_ertbackslash(document.body, i + 7, + '\\%s' % contents, + document.default_layout) + document.body[i+1:i+1] = ['\\end_layout', + '', + '\\end_inset'] + i += 3 + + +def is_inset_line(document, i): + """ Line i of body has an inset """ + if document.body[i][:1] == '\\': + return True + last_tokens = "".join(document.body[i].split()[-2:]) + return last_tokens.find('\\') != -1 + + +# A wrapper around normalize that handles special cases (cf. bug 3313) +def normalize(form, text): + # do not normalize OHM, ANGSTROM + keep_characters = [0x2126,0x212b] + result = '' + convert = '' + for i in text: + if ord(i) in keep_characters: + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + convert = '' + result = result + i + else: + convert = convert + i + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + return result + + +def revert_accent(document): + inverse_accent_map = {} + for k in accent_map: + inverse_accent_map[accent_map[k]] = k + inverse_special_accent_map = {} + for k in special_accent_map: + inverse_special_accent_map[special_accent_map[k]] = k + inverse_accented_map = {} + for k in accented_map: + inverse_accented_map[accented_map[k]] = k + + # Since LyX may insert a line break within a word we must combine all + # words before unicode normalization. + # We do this only if the next line starts with an accent, otherwise we + # would create things like '\begin_inset ERTstatus'. + for i in range(len(document.body) - 1): + if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ': + continue + if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)): + # the last character of this line and the first of the next line + # form probably a surrogate pair, inline insets are excluded (second part of the test) + while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '): + document.body[i] += document.body[i+1][0] + document.body[i+1] = document.body[i+1][1:] + + # Normalize to "Normal form D" (NFD, also known as canonical decomposition). + # This is needed to catch all accented characters. + for i in range(len(document.body)): + # Unfortunately we have a mixture of unicode strings and plain strings, + # because we never use u'xxx' for string literals, but 'xxx'. + # Therefore we may have to try two times to normalize the data. + try: + document.body[i] = normalize("NFD", document.body[i]) + except TypeError: + document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8')) + + # Replace accented characters with InsetLaTeXAccent + # Do not convert characters that can be represented in the chosen + # encoding. + encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)] + lang_re = re.compile(r"^\\lang\s(\S+)") + + i = 0 + while i < len(document.body): + if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '': + # Track the encoding of the current line + result = lang_re.match(document.body[i]) + if result: + language = result.group(1) + if language == "default": + encoding_stack[-1] = document.encoding + else: + from lyx2lyx_lang import lang + encoding_stack[-1] = lang[language][3] + continue + elif find_token(document.body, "\\begin_layout", i, i + 1) == i: + encoding_stack.append(encoding_stack[-1]) + continue + elif find_token(document.body, "\\end_layout", i, i + 1) == i: + del encoding_stack[-1] + continue + + for j in range(len(document.body[i])): + # dotless i and dotless j are both in special_accent_map and can + # occur as an accented character, so we need to test that the + # following character is no accent + if (document.body[i][j] in inverse_special_accent_map and + (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)): + accent = document.body[i][j] + try: + dummy = accent.encode(encoding_stack[-1]) + except UnicodeEncodeError: + # Insert the rest of the line as new line + if j < len(document.body[i]) - 1: + document.body.insert(i+1, document.body[i][j+1:]) + # Delete the accented character + document.body[i] = document.body[i][:j] + # Finally add the InsetLaTeXAccent + document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent] + break + elif j > 0 and document.body[i][j] in inverse_accent_map: + accented_char = document.body[i][j-1] + if accented_char == ' ': + # Conform to LyX output + accented_char = '' + elif accented_char in inverse_accented_map: + accented_char = inverse_accented_map[accented_char] + accent = document.body[i][j] + try: + dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1]) + except UnicodeEncodeError: + # Insert the rest of the line as new line + if j < len(document.body[i]) - 1: + document.body.insert(i+1, document.body[i][j+1:]) + # Delete the accented characters + document.body[i] = document.body[i][:j-1] + # Finally add the InsetLaTeXAccent + document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char) + break + i = i + 1 + + # Normalize to "Normal form C" (NFC, pre-composed characters) again + for i in range(len(document.body)): + document.body[i] = normalize("NFC", document.body[i]) + + +def normalize_font_whitespace_259(document): + """ Before format 259 the font changes were ignored if a + whitespace was the first or last character in the sequence, this function + transfers the whitespace outside.""" + + char_properties = {"\\series": "default", + "\\emph": "default", + "\\color": "none", + "\\shape": "default", + "\\bar": "default", + "\\family": "default"} + return normalize_font_whitespace(document, char_properties) + +def normalize_font_whitespace_274(document): + """ Before format 259 (sic) the font changes were ignored if a + whitespace was the first or last character in the sequence. This was + corrected for most font properties in format 259, but the language + was forgotten then. This function applies the same conversion done + there (namely, transfers the whitespace outside) for font language + changes, as well.""" + + char_properties = {"\\lang": "default"} + return normalize_font_whitespace(document, char_properties) + +def get_paragraph_language(document, i): + """ Return the language of the paragraph in which line i of the document + body is. If the first thing in the paragraph is a \\lang command, that + is the paragraph's langauge; otherwise, the paragraph's language is the + document's language.""" + + lines = document.body + + first_nonempty_line = \ + find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1) + + words = lines[first_nonempty_line].split() + + if len(words) > 1 and words[0] == "\\lang": + return words[1] + else: + return document.language + +def normalize_font_whitespace(document, char_properties): + """ Before format 259 the font changes were ignored if a + whitespace was the first or last character in the sequence, this function + transfers the whitespace outside. Only a change in one of the properties + in the provided char_properties is handled by this function.""" + + if document.backend != "latex": + return + + lines = document.body + + changes = {} + + i = 0 + while i < len(lines): + words = lines[i].split() + + if len(words) > 0 and words[0] == "\\begin_layout": + # a new paragraph resets all font changes + changes.clear() + # also reset the default language to be the paragraph's language + if "\\lang" in char_properties.keys(): + char_properties["\\lang"] = \ + get_paragraph_language(document, i + 1) + + elif len(words) > 1 and words[0] in char_properties.keys(): + # we have a font change + if char_properties[words[0]] == words[1]: + # property gets reset + if words[0] in changes.keys(): + del changes[words[0]] + defaultproperty = True + else: + # property gets set + changes[words[0]] = words[1] + defaultproperty = False + + # We need to explicitly reset all changed properties if we find + # a space below, because LyX 1.4 would output the space after + # closing the previous change and before starting the new one, + # and closing a font change means to close all properties, not + # just the changed one. + + if lines[i-1] and lines[i-1][-1] == " ": + lines[i-1] = lines[i-1][:-1] + # a space before the font change + added_lines = [" "] + for k in changes.keys(): + # exclude property k because that is already in lines[i] + if k != words[0]: + added_lines[1:1] = ["%s %s" % (k, changes[k])] + for k in changes.keys(): + # exclude property k because that must be added below anyway + if k != words[0]: + added_lines[0:0] = ["%s %s" % (k, char_properties[k])] + if defaultproperty: + # Property is reset in lines[i], so add the new stuff afterwards + lines[i+1:i+1] = added_lines + else: + # Reset property for the space + added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])] + lines[i:i] = added_lines + i = i + len(added_lines) + + elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty): + # a space after the font change + if (lines[i+1] == " " and lines[i+2]): + next_words = lines[i+2].split() + if len(next_words) > 0 and next_words[0] == words[0]: + # a single blank with a property different from the + # previous and the next line must not be changed + i = i + 2 + continue + lines[i+1] = lines[i+1][1:] + added_lines = [" "] + for k in changes.keys(): + # exclude property k because that is already in lines[i] + if k != words[0]: + added_lines[1:1] = ["%s %s" % (k, changes[k])] + for k in changes.keys(): + # exclude property k because that must be added below anyway + if k != words[0]: + added_lines[0:0] = ["%s %s" % (k, char_properties[k])] + # Reset property for the space + added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])] + lines[i:i] = added_lines + i = i + len(added_lines) + + i = i + 1 + + +def revert_utf8x(document): + " Set utf8x encoding to utf8. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "utf8x": + document.header[i] = "\\inputencoding utf8" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def revert_utf8plain(document): + " Set utf8plain encoding to utf8. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "utf8-plain": + document.header[i] = "\\inputencoding utf8" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def revert_beamer_alert(document): + " Revert beamer's \\alert inset back to ERT. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset CharStyle Alert", i) + if i == -1: + return + document.body[i] = "\\begin_inset ERT" + i = i + 1 + while 1: + if (document.body[i][:13] == "\\begin_layout"): + # Insert the \alert command + document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}' + break + i = i + 1 + + i = i + 1 + + +def revert_beamer_structure(document): + " Revert beamer's \\structure inset back to ERT. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset CharStyle Structure", i) + if i == -1: + return + document.body[i] = "\\begin_inset ERT" + i = i + 1 + while 1: + if (document.body[i][:13] == "\\begin_layout"): + document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}' + break + i = i + 1 + + i = i + 1 + + +def convert_changes(document): + " Switch output_changes off if tracking_changes is off. " + i = find_token(document.header, '\\tracking_changes', 0) + if i == -1: + document.warning("Malformed lyx document: Missing '\\tracking_changes'.") + return + j = find_token(document.header, '\\output_changes', 0) + if j == -1: + document.warning("Malformed lyx document: Missing '\\output_changes'.") + return + tracking_changes = get_value(document.header, "\\tracking_changes", i) + output_changes = get_value(document.header, "\\output_changes", j) + if tracking_changes == "false" and output_changes == "true": + document.header[j] = "\\output_changes false" + + +def revert_ascii(document): + " Set ascii encoding to auto. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "ascii": + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def normalize_language_name(document): + lang = { "brazil": "brazilian", + "portuges": "portuguese"} + + if document.language in lang: + document.language = lang[document.language] + i = find_token(document.header, "\\language", 0) + document.header[i] = "\\language %s" % document.language + + +def revert_language_name(document): + lang = { "brazilian": "brazil", + "portuguese": "portuges"} + + if document.language in lang: + document.language = lang[document.language] + i = find_token(document.header, "\\language", 0) + document.header[i] = "\\language %s" % document.language + +# +# \textclass cv -> \textclass simplecv +def convert_cv_textclass(document): + if document.textclass == "cv": + document.textclass = "simplecv" + + +def revert_cv_textclass(document): + if document.textclass == "simplecv": + document.textclass = "cv" + + +# +# add scaleBeforeRotation graphics param +def convert_graphics_rotation(document): + " add scaleBeforeRotation graphics parameter. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset Graphics", i) + if i == -1: + return + j = find_end_of_inset(document.body, i+1) + if j == -1: + # should not happen + document.warning("Malformed LyX document: Could not find end of graphics inset.") + # Seach for rotateAngle and width or height or scale + # If these params are not there, nothing needs to be done. + k = find_token(document.body, "\trotateAngle", i + 1, j) + l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j) + if (k != -1 and l != -1): + document.body.insert(j, 'scaleBeforeRotation') + i = i + 1 + + +# +# remove scaleBeforeRotation graphics param +def revert_graphics_rotation(document): + " remove scaleBeforeRotation graphics parameter. " + i = 0 + while 1: + i = find_token(document.body, "\\begin_inset Graphics", i) + if i == -1: + return + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # should not happen + document.warning("Malformed LyX document: Could not find end of graphics inset.") + # If there's a scaleBeforeRotation param, just remove that + k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j) + if k != -1: + del document.body[k] + else: + # if not, and if we have rotateAngle and width or height or scale, + # we have to put the rotateAngle value to special + rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j) + special = get_value(document.body, 'special', i + 1, j) + if rotateAngle != "": + k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j) + if k == -1: + break + if special == "": + document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle) + else: + l = find_token(document.body, "\tspecial", i + 1, j) + document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special)) + k = find_token(document.body, "\trotateAngle", i + 1, j) + if k != -1: + del document.body[k] + i = i + 1 + + + +def convert_tableborder(document): + # The problem is: LyX doubles the table cell border as it ignores the "|" character in + # the cell arguments. A fix takes care of this and therefore the "|" has to be removed + i = 0 + while i < len(document.body): + h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i])) + k = document.body[i].find("|>{", 0, len(document.body[i])) + # the two tokens have to be in one line + if (h != -1 and k != -1): + # delete the "|" + document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])] + i = i + 1 + + +def revert_tableborder(document): + i = 0 + while i < len(document.body): + h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i])) + k = document.body[i].find(">{", 0, len(document.body[i])) + # the two tokens have to be in one line + if (h != -1 and k != -1): + # add the "|" + document.body[i] = document.body[i][:k] + '|' + document.body[i][k:] + i = i + 1 + + +def revert_armenian(document): + + # set inputencoding from armscii8 to auto + if document.inputencoding == "armscii8": + i = find_token(document.header, "\\inputencoding", 0) + if i != -1: + document.header[i] = "\\inputencoding auto" + # check if preamble exists, if not k is set to -1 + i = 0 + k = -1 + while i < len(document.preamble): + if k == -1: + k = document.preamble[i].find("\\", 0, len(document.preamble[i])) + if k == -1: + k = document.preamble[i].find("%", 0, len(document.preamble[i])) + i = i + 1 + # add the entry \usepackage{armtex} to the document preamble + if document.language == "armenian": + # set the armtex entry as the first preamble line + if k != -1: + document.preamble[0:0] = ["\\usepackage{armtex}"] + # create the preamble when it doesn't exist + else: + document.preamble.append('\\usepackage{armtex}') + # Set document language from armenian to english + if document.language == "armenian": + document.language = "english" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language english" + + +def revert_CJK(document): + " Set CJK encodings to default and languages chinese, japanese and korean to english. " + encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS", + "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"] + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc in encodings: + document.header[i] = "\\inputencoding default" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + if document.language == "chinese-simplified" or \ + document.language == "chinese-traditional" or \ + document.language == "japanese" or document.language == "korean": + document.language = "english" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language english" + + +def revert_preamble_listings_params(document): + " Revert preamble option \listings_params " + i = find_token(document.header, "\\listings_params", 0) + if i != -1: + document.preamble.append('\\usepackage{listings}') + document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"')) + document.header.pop(i); + + +def revert_listings_inset(document): + r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate +FROM + +\begin_inset +lstparams "language=Delphi" +inline true +status open + +\begin_layout Standard +var i = 10; +\end_layout + +\end_inset + +TO + +\begin_inset ERT +status open +\begin_layout Standard + + +\backslash +lstinline[language=Delphi]{var i = 10;} +\end_layout + +\end_inset + +There can be an caption inset in this inset + +\begin_layout Standard +\begin_inset Caption + +\begin_layout Standard +before label +\begin_inset LatexCommand label +name "lst:caption" + +\end_inset + +after label +\end_layout + +\end_inset + + +\end_layout + +''' + i = 0 + while True: + i = find_token(document.body, '\\begin_inset listings', i) + if i == -1: + break + else: + if not '\\usepackage{listings}' in document.preamble: + document.preamble.append('\\usepackage{listings}') + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # this should not happen + break + inline = 'false' + params = '' + status = 'open' + # first three lines + for line in range(i + 1, i + 4): + if document.body[line].startswith('inline'): + inline = document.body[line].split()[1] + if document.body[line].startswith('lstparams'): + params = document.body[line].split()[1].strip('"') + if document.body[line].startswith('status'): + status = document.body[line].split()[1].strip() + k = line + 1 + # caption? + caption = '' + label = '' + cap = find_token(document.body, '\\begin_inset Caption', i) + if cap != -1: + cap_end = find_end_of_inset(document.body, cap + 1) + if cap_end == -1: + # this should not happen + break + # label? + lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1) + if lbl != -1: + lbl_end = find_end_of_inset(document.body, lbl + 1) + if lbl_end == -1: + # this should not happen + break + else: + lbl = cap_end + lbl_end = cap_end + for line in document.body[lbl : lbl_end + 1]: + if line.startswith('name '): + label = line.split()[1].strip('"') + break + for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]: + if not line.startswith('\\'): + caption += line.strip() + k = cap_end + 1 + inlinecode = '' + # looking for the oneline code for lstinline + inlinecode = document.body[find_end_of_layout(document.body, + find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1] + if len(caption) > 0: + if len(params) == 0: + params = 'caption={%s}' % caption + else: + params += ',caption={%s}' % caption + if len(label) > 0: + if len(params) == 0: + params = 'label={%s}' % label + else: + params += ',label={%s}' % label + if len(params) > 0: + params = '[%s]' % params + params = params.replace('\\', '\\backslash\n') + if inline == 'true': + document.body[i:(j+1)] = [r'\begin_inset ERT', + 'status %s' % status, + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + 'lstinline%s{%s}' % (params, inlinecode), + r'\end_layout', + '', + r'\end_inset'] + else: + document.body[i: j+1] = [r'\begin_inset ERT', + 'status %s' % status, + '', + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + r'begin{lstlisting}%s' % params, + r'\end_layout', + '', + r'\begin_layout %s' % document.default_layout, + ] + document.body[k : j - 1] + \ + ['', + r'\begin_layout %s' % document.default_layout, + '', + r'\backslash', + 'end{lstlisting}', + r'\end_layout', + '', + r'\end_inset'] + + +def revert_include_listings(document): + r''' Revert lstinputlisting Include option , translate +\begin_inset Include \lstinputlisting{file}[opt] +preview false + +\end_inset + +TO + +\begin_inset ERT +status open + +\begin_layout Standard + + +\backslash +lstinputlisting{file}[opt] +\end_layout + +\end_inset + ''' + + i = 0 + while True: + i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i) + if i == -1: + break + else: + if not '\\usepackage{listings}' in document.preamble: + document.preamble.append('\\usepackage{listings}') + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # this should not happen + break + # find command line lstinputlisting{file}[options] + cmd, file, option = '', '', '' + if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]): + cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups() + option = option.replace('\\', '\\backslash\n') + document.body[i : j + 1] = [r'\begin_inset ERT', + 'status open', + '', + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + '%s%s{%s}' % (cmd, option, file), + r'\end_layout', + '', + r'\end_inset'] + + +def revert_ext_font_sizes(document): + if document.backend != "latex": return + if not document.textclass.startswith("ext"): return + + fontsize = get_value(document.header, '\\paperfontsize', 0) + if fontsize not in ('10', '11', '12'): return + fontsize += 'pt' + + i = find_token(document.header, '\\paperfontsize', 0) + document.header[i] = '\\paperfontsize default' + + i = find_token(document.header, '\\options', 0) + if i == -1: + i = find_token(document.header, '\\textclass', 0) + 1 + document.header[i:i] = ['\\options %s' % fontsize] + else: + document.header[i] += ',%s' % fontsize + + +def convert_ext_font_sizes(document): + if document.backend != "latex": return + if not document.textclass.startswith("ext"): return + + fontsize = get_value(document.header, '\\paperfontsize', 0) + if fontsize != 'default': return + + i = find_token(document.header, '\\options', 0) + if i == -1: return + + options = get_value(document.header, '\\options', i) + + fontsizes = '10pt', '11pt', '12pt' + for fs in fontsizes: + if options.find(fs) != -1: + break + else: # this else will only be attained if the for cycle had no match + return + + options = options.split(',') + for j, opt in enumerate(options): + if opt in fontsizes: + fontsize = opt[:-2] + del options[j] + break + else: + return + + k = find_token(document.header, '\\paperfontsize', 0) + document.header[k] = '\\paperfontsize %s' % fontsize + + if options: + document.header[i] = '\\options %s' % ','.join(options) + else: + del document.header[i] + + +def revert_separator_layout(document): + r'''Revert --Separator-- to a lyx note +From + +\begin_layout --Separator-- +something +\end_layout + +to + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Standard +Separate Evironment +\end_layout + +\end_inset +something + +\end_layout + + ''' + + i = 0 + while True: + i = find_token(document.body, r'\begin_layout --Separator--', i) + if i == -1: + break + j = find_end_of_layout(document.body, i + 1) + if j == -1: + # this should not happen + break + document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout, + r'\begin_inset Note Note', + 'status open', + '', + r'\begin_layout %s' % document.default_layout, + 'Separate Environment', + r'\end_layout', + '', + r'\end_inset'] + \ + document.body[ i + 1 : j] + \ + ['', + r'\end_layout' + ] + + +def convert_arabic (document): + if document.language == "arabic": + document.language = "arabic_arabtex" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language arabic_arabtex" + i = 0 + while i < len(document.body): + h = document.body[i].find("\lang arabic", 0, len(document.body[i])) + if (h != -1): + # change the language name + document.body[i] = '\lang arabic_arabtex' + i = i + 1 + + +def revert_arabic (document): + if document.language == "arabic_arabtex": + document.language = "arabic" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language arabic" + i = 0 + while i < len(document.body): + h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i])) + if (h != -1): + # change the language name + document.body[i] = '\lang arabic' i = i + 1 @@ -205,15 +2027,73 @@ def revert_booktabs(file): # Conversion hub # +supported_versions = ["1.5.0","1.5"] convert = [[246, []], [247, [convert_font_settings]], - [248, []]] + [248, []], + [249, [convert_utf8]], + [250, []], + [251, []], + [252, [convert_commandparams, convert_bibitem]], + [253, []], + [254, [convert_esint]], + [255, []], + [256, []], + [257, [convert_caption]], + [258, [convert_lyxline]], + [259, [convert_accent, normalize_font_whitespace_259]], + [260, []], + [261, [convert_changes]], + [262, []], + [263, [normalize_language_name]], + [264, [convert_cv_textclass]], + [265, [convert_tableborder]], + [266, []], + [267, []], + [268, []], + [269, []], + [270, []], + [271, [convert_ext_font_sizes]], + [272, []], + [273, []], + [274, [normalize_font_whitespace_274]], + [275, [convert_graphics_rotation]], + [276, [convert_arabic]] + ] -revert = [[247, [revert_booktabs]], +revert = [ + [275, [revert_arabic]], + [274, [revert_graphics_rotation]], + [273, []], + [272, [revert_separator_layout]], + [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]], + [270, [revert_ext_font_sizes]], + [269, [revert_beamer_alert, revert_beamer_structure]], + [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]], + [267, [revert_CJK]], + [266, [revert_utf8plain]], + [265, [revert_armenian]], + [264, [revert_tableborder]], + [263, [revert_cv_textclass]], + [262, [revert_language_name]], + [261, [revert_ascii]], + [260, []], + [259, [revert_utf8x]], + [258, []], + [257, []], + [256, [revert_caption]], + [255, [revert_encodings]], + [254, [revert_clearpage, revert_cleardoublepage]], + [253, [revert_esint]], + [252, [revert_nomenclature, revert_printnomenclature]], + [251, [revert_commandparams]], + [250, [revert_cs_label]], + [249, []], + [248, [revert_accent, revert_utf8, revert_unicode]], + [247, [revert_booktabs]], [246, [revert_font_settings]], [245, [revert_framed]]] if __name__ == "__main__": pass -