X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx_1_5.py;h=4b0fd5cfbbe55ca083e020f787782482c8b35c94;hb=28f76b84c96323e4dad43140121aae01820b4c38;hp=70ec731ee691e5c256fcc7e0aad415769d91a3c2;hpb=06139fb170c5c0e3c1bb87feed81ee5cc72a595e;p=lyx.git diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index 70ec731ee6..4b0fd5cfbb 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -15,22 +15,42 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ Convert files to the file format generated by lyx 1.5""" import re -from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value +import unicodedata +import sys, os + +from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line +from lyx2lyx_tools import insert_document_option from LyX import get_encoding +# Provide support for both python 2 and 3 +PY2 = sys.version_info[0] == 2 +if not PY2: + text_type = str + unichr = chr +else: + text_type = unicode +# End of code to support for both python 2 and 3 #################################################################### # Private helper functions def find_end_of_inset(lines, i): - " Find beginning of inset, where lines[i] is included." + " Find end of inset, where lines[i] is included." return find_end_of(lines, i, "\\begin_inset", "\\end_inset") +def find_end_of_layout(lines, i): + " Find end of layout, where lines[i] is included." + return find_end_of(lines, i, "\\begin_layout", "\\end_layout") + +def find_beginning_of_layout(lines, i): + "Find beginning of layout, where lines[i] is included." + return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout") + # End of helper functions #################################################################### @@ -42,7 +62,7 @@ def find_end_of_inset(lines, i): def revert_framed(document): "Revert framed notes. " i = 0 - while 1: + while True: i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i) if i == -1: @@ -82,7 +102,7 @@ def convert_font_settings(document): if font_scheme == '': document.warning("Malformed LyX document: Empty `\\fontscheme'.") font_scheme = 'default' - if not font_scheme in roman_fonts.keys(): + if not font_scheme in list(roman_fonts.keys()): document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme) font_scheme = 'default' document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme], @@ -152,7 +172,7 @@ def revert_font_settings(document): del document.header[i] if font_tt_scale != '100': document.warning("Conversion of '\\font_tt_scale' not yet implemented.") - for font_scheme in roman_fonts.keys(): + for font_scheme in list(roman_fonts.keys()): if (roman_fonts[font_scheme] == fonts['roman'] and sans_fonts[font_scheme] == fonts['sans'] and typewriter_fonts[font_scheme] == fonts['typewriter']): @@ -197,7 +217,7 @@ def revert_booktabs(document): re_bspace = re.compile(r'\s+bottomspace="[^"]+"') re_ispace = re.compile(r'\s+interlinespace="[^"]+"') i = 0 - while 1: + while True: i = find_token(document.body, "\\begin_inset Tabular", i) if i == -1: return @@ -217,31 +237,248 @@ def revert_booktabs(document): i = i + 1 +def convert_multiencoding(document, forward): + """ Fix files with multiple encodings. +Files with an inputencoding of "auto" or "default" and multiple languages +where at least two languages have different default encodings are encoded +in multiple encodings for file formats < 249. These files are incorrectly +read and written (as if the whole file was in the encoding of the main +language). +This is not true for files written by CJK-LyX, they are always in the locale +encoding. + +This function +- converts from fake unicode values to true unicode if forward is true, and +- converts from true unicode values to fake unicode if forward is false. +document.encoding must be set to the old value (format 248) in both cases. + +We do this here and not in LyX.py because it is far easier to do the +necessary parsing in modern formats than in ancient ones. +""" + inset_types = ["Foot", "Note"] + if document.cjk_encoding != '': + return + encoding_stack = [document.encoding] + insets = [] + lang_re = re.compile(r"^\\lang\s(\S+)") + inset_re = re.compile(r"^\\begin_inset\s(\S+)") + if not forward: # no need to read file unless we are reverting + spec_chars = read_unicodesymbols() + + if document.inputencoding == "auto" or document.inputencoding == "default": + i = 0 + while i < len(document.body): + result = lang_re.match(document.body[i]) + if result: + language = result.group(1) + if language == "default": + document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3) + encoding_stack[-1] = document.encoding + else: + from lyx2lyx_lang import lang + document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3) + encoding_stack[-1] = lang[language][3] + elif find_token(document.body, "\\begin_layout", i, i + 1) == i: + document.warning("Adding nested encoding %s." % encoding_stack[-1], 3) + if len(insets) > 0 and insets[-1] in inset_types: + from lyx2lyx_lang import lang + encoding_stack.append(lang[document.language][3]) + else: + encoding_stack.append(encoding_stack[-1]) + elif find_token(document.body, "\\end_layout", i, i + 1) == i: + document.warning("Removing nested encoding %s." % encoding_stack[-1], 3) + if len(encoding_stack) == 1: + # Don't remove the document encoding from the stack + document.warning("Malformed LyX document: Unexpected `\\end_layout'.") + else: + del encoding_stack[-1] + elif find_token(document.body, "\\begin_inset", i, i + 1) == i: + inset_result = inset_re.match(document.body[i]) + if inset_result: + insets.append(inset_result.group(1)) + else: + insets.append("") + elif find_token(document.body, "\\end_inset", i, i + 1) == i: + del insets[-1] + if encoding_stack[-1] != document.encoding: + if forward: + # This line has been incorrectly interpreted as if it was + # encoded in 'encoding'. + # Convert back to the 8bit string that was in the file. + orig = document.body[i].encode(document.encoding) + # Convert the 8bit string that was in the file to unicode + # with the correct encoding. + document.body[i] = orig.decode(encoding_stack[-1]) + else: + try: + # Convert unicode to the 8bit string that will be written + # to the file with the correct encoding. + orig = document.body[i].encode(encoding_stack[-1]) + # Convert the 8bit string that will be written to the + # file to fake unicode with the encoding that will later + # be used when writing to the file. + document.body[i] = orig.decode(document.encoding) + except: + mod_line = revert_unicode_line(document, i, insets, spec_chars) + document.body[i:i+1] = mod_line.split('\n') + i += len(mod_line.split('\n')) - 1 + i += 1 + + def convert_utf8(document): + " Set document encoding to UTF-8. " + convert_multiencoding(document, True) document.encoding = "utf8" def revert_utf8(document): + " Set document encoding to the value corresponding to inputencoding. " i = find_token(document.header, "\\inputencoding", 0) if i == -1: document.header.append("\\inputencoding auto") elif get_value(document.header, "\\inputencoding", i) == "utf8": document.header[i] = "\\inputencoding auto" document.inputencoding = get_value(document.header, "\\inputencoding", 0) - document.encoding = get_encoding(document.language, document.inputencoding, 248) + document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding) + convert_multiencoding(document, False) + + +# FIXME: Use the version in unicode_symbols.py which has some bug fixes +def read_unicodesymbols(): + " Read the unicodesymbols list of unicode characters and corresponding commands." + pathname = os.path.abspath(os.path.dirname(sys.argv[0])) + fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')) + spec_chars = {} + for line in fp.readlines(): + if line[0] != '#': + line=line.replace(' "',' ') # remove all quotation marks with spaces before + line=line.replace('" ',' ') # remove all quotation marks with spaces after + line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis) + try: + # flag1 and flag2 are preamble and other flags + [ucs4,command,flag1,flag2] =line.split(None,3) + spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2] + except: + pass + fp.close() + return spec_chars + + +def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'): + # Define strings to start and end ERT and math insets + ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout + ert_outro='\n\\end_layout\n\n\\end_inset\n' + math_intro='\n\\begin_inset Formula $' + math_outro='$\n\\end_inset' + + mod_line = u'' + if i and not is_inset_line(document, i-1): + last_char = document.body[i - 1][-1:] + else: + last_char = '' + + line = document.body[i] + for character in line: + try: + # Try to write the character + dummy = character.encode(document.encoding) + mod_line += character + last_char = character + except: + # Try to replace with ERT/math inset + if character in spec_chars: + command = spec_chars[character][0] # the command to replace unicode + flag1 = spec_chars[character][1] + flag2 = spec_chars[character][2] + if flag1.find('combining') > -1 or flag2.find('combining') > -1: + # We have a character that should be combined with the previous + command += '{' + last_char + '}' + # Remove the last character. Ignore if it is whitespace + if len(last_char.rstrip()): + # last_char was found and is not whitespace + if mod_line: + mod_line = mod_line[:-1] + else: # last_char belongs to the last line + document.body[i-1] = document.body[i-1][:-1] + else: + # The last character was replaced by a command. For now it is + # ignored. This could be handled better. + pass + if command[0:2] == '\\\\': + if command[2:12]=='ensuremath': + if insets and insets[-1] == "ERT": + # math in ERT + command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n') + command = command.replace('}', '$\n') + elif not insets or insets[-1] != "Formula": + # add a math inset with the replacement character + command = command.replace('\\\\ensuremath{\\', math_intro) + command = command.replace('}', math_outro) + else: + # we are already in a math inset + command = command.replace('\\\\ensuremath{\\', '') + command = command.replace('}', '') + else: + if insets and insets[-1] == "Formula": + # avoid putting an ERT in a math; instead put command as text + command = command.replace('\\\\', r'\mathrm{') + command = command + '}' + elif not insets or insets[-1] != "ERT": + # add an ERT inset with the replacement character + command = command.replace('\\\\', '\n\\backslash\n') + command = ert_intro + command + ert_outro + else: + command = command.replace('\\\\', '\n\\backslash\n') + last_char = '' # indicate that the character should not be removed + mod_line += command + else: + # Replace with replacement string + mod_line += replacement_character + return mod_line + + +def revert_unicode(document): + '''Transform unicode characters that can not be written using the +document encoding to commands according to the unicodesymbols +file. Characters that can not be replaced by commands are replaced by +an replacement string. Flags other than 'combined' are currently not +implemented.''' + spec_chars = read_unicodesymbols() + insets = [] # list of active insets + + # Go through the document to capture all combining characters + i = 0 + while i < len(document.body): + line = document.body[i] + # Check for insets + if line.find('\\begin_inset') > -1: + insets.append(line[13:].split()[0]) + if line.find('\\end_inset') > -1: + del insets[-1] + + # Try to write the line + try: + # If all goes well the line is written here + dummy = line.encode(document.encoding) + i += 1 + except: + # Error, some character(s) in the line need to be replaced + mod_line = revert_unicode_line(document, i, insets, spec_chars) + document.body[i:i+1] = mod_line.split('\n') + i += len(mod_line.split('\n')) def revert_cs_label(document): " Remove status flag of charstyle label. " i = 0 - while 1: + while True: i = find_token(document.body, "\\begin_inset CharStyle", i) if i == -1: return # Seach for a line starting 'show_label' # If it is not there, break with a warning message i = i + 1 - while 1: + while True: if (document.body[i][:10] == "show_label"): del document.body[i] break @@ -254,7 +491,7 @@ def revert_cs_label(document): def convert_bibitem(document): - """ Convert + r""" Convert \bibitem [option]{argument} to @@ -267,19 +504,24 @@ key "argument" This must be called after convert_commandparams. """ - regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})') i = 0 - while 1: + while True: i = find_token(document.body, "\\bibitem", i) if i == -1: break - match = re.match(regex, document.body[i]) - option = match.group(1) - argument = match.group(2) + j = document.body[i].find('[') + 1 + k = document.body[i].rfind(']') + if j == 0: # No optional argument found + option = None + else: + option = document.body[i][j:k] + j = document.body[i].rfind('{') + 1 + k = document.body[i].rfind('}') + argument = document.body[i][j:k] lines = ['\\begin_inset LatexCommand bibitem'] if option != None: - lines.append('label "%s"' % option[1:-1].replace('"', '\\"')) - lines.append('key "%s"' % argument[1:-1].replace('"', '\\"')) + lines.append('label "%s"' % option.replace('"', '\\"')) + lines.append('key "%s"' % argument.replace('"', '\\"')) lines.append('') lines.append('\\end_inset') document.body[i:i+1] = lines @@ -334,27 +576,24 @@ commandparams_info = { def convert_commandparams(document): """ Convert - \begin_inset LatexCommand \cmdname[opt1][opt2]{arg} - \end_inset + \\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg} + \\end_inset to - \begin_inset LatexCommand cmdname + \\begin_inset LatexCommand cmdname name1 "opt1" name2 "opt2" name3 "arg" - \end_inset + \\end_inset name1, name2 and name3 can be different for each command. """ # \begin_inset LatexCommand bibitem was not the official version (see # convert_bibitem()), but could be read in, so we convert it here, too. - # FIXME: Handle things like \command[foo[bar]]{foo{bar}} - # we need a real parser here. - regex = re.compile(r'\\([^\[\{]+)(\[[^\[\{]*\])?(\[[^\[\{]*\])?(\{[^}]*\})?') i = 0 - while 1: + while True: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break @@ -364,6 +603,13 @@ def convert_commandparams(document): i = i + 1 continue + j = find_token(document.body, "\\end_inset", i + 1) + if j == -1: + document.warning("Malformed document") + else: + command += "".join(document.body[i+1:j]) + document.body[i+1:j] = [] + # The following parser is taken from the original InsetCommandParams::scanCommand name = "" option1 = "" @@ -372,8 +618,8 @@ def convert_commandparams(document): state = "WS" # Used to handle things like \command[foo[bar]]{foo{bar}} nestdepth = 0 - for j in range(len(command)): - c = command[j] + b = 0 + for c in command: if ((state == "CMDNAME" and c == ' ') or (state == "CMDNAME" and c == '[') or (state == "CMDNAME" and c == '{')): @@ -384,11 +630,11 @@ def convert_commandparams(document): if nestdepth == 0: state = "WS" else: - --nestdepth + nestdepth = nestdepth - 1 if ((state == "OPTION" and c == '[') or (state == "SECOPTION" and c == '[') or (state == "CONTENT" and c == '{')): - ++nestdepth + nestdepth = nestdepth + 1 if state == "CMDNAME": name += c elif state == "OPTION": @@ -398,10 +644,6 @@ def convert_commandparams(document): elif state == "CONTENT": argument += c elif state == "WS": - if j > 0: - b = command[j-1] - else: - b = 0 if c == '\\': state = "CMDNAME" elif c == '[' and b != ']': @@ -413,6 +655,7 @@ def convert_commandparams(document): elif c == '{': state = "CONTENT" nestdepth = 0 # Just to be sure + b = c # Now we have parsed the command, output the parameters lines = ["\\begin_inset LatexCommand %s" % name] @@ -420,17 +663,17 @@ def convert_commandparams(document): if commandparams_info[name][0] == "": document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name)) else: - lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"'))) + lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"'))) if option2 != "": if commandparams_info[name][1] == "": document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name)) else: - lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"'))) + lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"'))) if argument != "": if commandparams_info[name][2] == "": document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name)) else: - lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"'))) + lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"'))) document.body[i:i+1] = lines i = i + 1 @@ -438,12 +681,12 @@ def convert_commandparams(document): def revert_commandparams(document): regex = re.compile(r'(\S+)\s+(.+)') i = 0 - while 1: + while True: i = find_token(document.body, "\\begin_inset LatexCommand", i) if i == -1: break name = document.body[i].split()[2] - j = find_end_of_inset(document.body, i + 1) + j = find_end_of_inset(document.body, i) preview_line = "" option1 = "" option2 = "" @@ -457,13 +700,13 @@ def revert_commandparams(document): preview_line = document.body[k] elif (commandparams_info[name][0] != "" and pname == commandparams_info[name][0]): - option1 = pvalue.strip('"').replace('\\"', '"') + option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') elif (commandparams_info[name][1] != "" and pname == commandparams_info[name][1]): - option2 = pvalue.strip('"').replace('\\"', '"') + option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') elif (commandparams_info[name][2] != "" and pname == commandparams_info[name][2]): - argument = pvalue.strip('"').replace('\\"', '"') + argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\') elif document.body[k].strip() != "": document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name)) if name == "bibitem": @@ -488,7 +731,1300 @@ def revert_commandparams(document): lines.append('') lines.append('\\end_inset') document.body[i:j+1] = lines - i = j + 1 + i += len(lines) + 1 + + +def revert_nomenclature(document): + " Convert nomenclature entry to ERT. " + regex = re.compile(r'(\S+)\s+(.+)') + i = 0 + use_nomencl = 0 + while True: + i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i) + if i == -1: + break + use_nomencl = 1 + j = find_end_of_inset(document.body, i + 1) + preview_line = "" + symbol = "" + description = "" + prefix = "" + for k in range(i + 1, j): + match = re.match(regex, document.body[k]) + if match: + name = match.group(1) + value = match.group(2) + if name == "preview": + preview_line = document.body[k] + elif name == "symbol": + symbol = value.strip('"').replace('\\"', '"') + elif name == "description": + description = value.strip('"').replace('\\"', '"') + elif name == "prefix": + prefix = value.strip('"').replace('\\"', '"') + elif document.body[k].strip() != "": + document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]) + if prefix == "": + command = 'nomenclature{%s}{%s}' % (symbol, description) + else: + command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description) + document.body[i:j+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + command, + '\\end_layout', + '', + '\\end_inset'] + i = i + 11 + if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: + document.preamble.append('\\usepackage{nomencl}[2005/09/22]') + document.preamble.append('\\makenomenclature') + + +def revert_printnomenclature(document): + " Convert printnomenclature to ERT. " + regex = re.compile(r'(\S+)\s+(.+)') + i = 0 + use_nomencl = 0 + while True: + i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i) + if i == -1: + break + use_nomencl = 1 + j = find_end_of_inset(document.body, i + 1) + preview_line = "" + labelwidth = "" + for k in range(i + 1, j): + match = re.match(regex, document.body[k]) + if match: + name = match.group(1) + value = match.group(2) + if name == "preview": + preview_line = document.body[k] + elif name == "labelwidth": + labelwidth = value.strip('"').replace('\\"', '"') + elif document.body[k].strip() != "": + document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k]) + if labelwidth == "": + command = 'nomenclature{}' + else: + command = 'nomenclature[%s]' % labelwidth + document.body[i:j+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + command, + '\\end_layout', + '', + '\\end_inset'] + i = i + 11 + if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1: + document.preamble.append('\\usepackage{nomencl}[2005/09/22]') + document.preamble.append('\\makenomenclature') + + +def convert_esint(document): + " Add \\use_esint setting to header. " + i = find_token(document.header, "\\cite_engine", 0) + if i == -1: + document.warning("Malformed LyX document: Missing `\\cite_engine'.") + return + # 0 is off, 1 is auto, 2 is on. + document.header.insert(i, '\\use_esint 0') + + +def revert_esint(document): + " Remove \\use_esint setting from header. " + i = find_token(document.header, "\\use_esint", 0) + if i == -1: + document.warning("Malformed LyX document: Missing `\\use_esint'.") + return + use_esint = document.header[i].split()[1] + del document.header[i] + # 0 is off, 1 is auto, 2 is on. + if (use_esint == 2): + document.preamble.append('\\usepackage{esint}') + + +def revert_clearpage(document): + " clearpage -> ERT " + i = 0 + while True: + i = find_token(document.body, "\\clearpage", i) + if i == -1: + break + document.body[i:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + 'clearpage', + '\\end_layout', + '', + '\\end_inset'] + i = i + 1 + + +def revert_cleardoublepage(document): + " cleardoublepage -> ERT " + i = 0 + while True: + i = find_token(document.body, "\\cleardoublepage", i) + if i == -1: + break + document.body[i:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + '\\backslash', + 'cleardoublepage', + '\\end_layout', + '', + '\\end_inset'] + i = i + 1 + + +def convert_lyxline(document): + r" remove fontsize commands for \lyxline " + # The problematic is: The old \lyxline definition doesn't handle the fontsize + # to change the line thickness. The new definiton does this so that imported + # \lyxlines would have a different line thickness. The eventual fontsize command + # before \lyxline is therefore removed to get the same output. + fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize", + "large", "Large", "LARGE", "huge", "Huge"] + for n in range(0, len(fontsizes)): + i = 0 + k = 0 + while i < len(document.body): + i = find_token(document.body, "\\size " + fontsizes[n], i) + k = find_token(document.body, "\\lyxline", i) + # the corresponding fontsize command is always 2 lines before the \lyxline + if (i != -1 and k == i+2): + document.body[i:i+1] = [] + else: + break + i = i + 1 + + +def revert_encodings(document): + " Set new encodings to auto. " + encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852", + "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250", + "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"] + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc in encodings: + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def convert_caption(document): + " Convert caption layouts to caption insets. " + i = 0 + while True: + i = find_token(document.body, "\\begin_layout Caption", i) + if i == -1: + return + j = find_end_of_layout(document.body, i) + if j == -1: + document.warning("Malformed LyX document: Missing `\\end_layout'.") + return + + document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""] + document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout, + "\\begin_inset Caption", "", + "\\begin_layout %s" % document.default_layout] + i = i + 1 + + +def revert_caption(document): + " Convert caption insets to caption layouts. " + " This assumes that the text class has a caption style. " + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Caption", i) + if i == -1: + return + + # We either need to delete the previous \begin_layout line, or we + # need to end the previous layout if this inset is not in the first + # position of the paragraph. + layout_before = find_token_backwards(document.body, "\\begin_layout", i) + if layout_before == -1: + document.warning("Malformed LyX document: Missing `\\begin_layout'.") + return + layout_line = document.body[layout_before] + del_layout_before = True + l = layout_before + 1 + while l < i: + if document.body[l] != "": + del_layout_before = False + break + l = l + 1 + if del_layout_before: + del document.body[layout_before:i] + i = layout_before + else: + document.body[i:i] = ["\\end_layout", ""] + i = i + 2 + + # Find start of layout in the inset and end of inset + j = find_token(document.body, "\\begin_layout", i) + if j == -1: + document.warning("Malformed LyX document: Missing `\\begin_layout'.") + return + k = find_end_of_inset(document.body, i) + if k == -1: + document.warning("Malformed LyX document: Missing `\\end_inset'.") + return + + # We either need to delete the following \end_layout line, or we need + # to restart the old layout if this inset is not at the paragraph end. + layout_after = find_token(document.body, "\\end_layout", k) + if layout_after == -1: + document.warning("Malformed LyX document: Missing `\\end_layout'.") + return + del_layout_after = True + l = k + 1 + while l < layout_after: + if document.body[l] != "": + del_layout_after = False + break + l = l + 1 + if del_layout_after: + del document.body[k+1:layout_after+1] + else: + document.body[k+1:k+1] = [layout_line, ""] + + # delete \begin_layout and \end_inset and replace \begin_inset with + # "\begin_layout Caption". This works because we can only have one + # paragraph in the caption inset: The old \end_layout will be recycled. + del document.body[k] + if document.body[k] == "": + del document.body[k] + del document.body[j] + if document.body[j] == "": + del document.body[j] + document.body[i] = "\\begin_layout Caption" + if document.body[i+1] == "": + del document.body[i+1] + i = i + 1 + + +# Accents of InsetLaTeXAccent +accent_map = { + "`" : u'\u0300', # grave + "'" : u'\u0301', # acute + "^" : u'\u0302', # circumflex + "~" : u'\u0303', # tilde + "=" : u'\u0304', # macron + "u" : u'\u0306', # breve + "." : u'\u0307', # dot above + "\"": u'\u0308', # diaeresis + "r" : u'\u030a', # ring above + "H" : u'\u030b', # double acute + "v" : u'\u030c', # caron + "b" : u'\u0320', # minus sign below + "d" : u'\u0323', # dot below + "c" : u'\u0327', # cedilla + "k" : u'\u0328', # ogonek + "t" : u'\u0361' # tie. This is special: It spans two characters, but + # only one is given as argument, so we don't need to + # treat it differently. +} + + +# special accents of InsetLaTeXAccent without argument +special_accent_map = { + 'i' : u'\u0131', # dotless i + 'j' : u'\u0237', # dotless j + 'l' : u'\u0142', # l with stroke + 'L' : u'\u0141' # L with stroke +} + + +# special accent arguments of InsetLaTeXAccent +accented_map = { + '\\i' : u'\u0131', # dotless i + '\\j' : u'\u0237' # dotless j +} + + +def _convert_accent(accent, accented_char): + type = accent + char = accented_char + if char == '': + if type in special_accent_map: + return special_accent_map[type] + # a missing char is treated as space by LyX + char = ' ' + elif type == 'q' and char in ['t', 'd', 'l', 'L']: + # Special caron, only used with t, d, l and L. + # It is not in the map because we convert it to the same unicode + # character as the normal caron: \q{} is only defined if babel with + # the czech or slovak language is used, and the normal caron + # produces the correct output if the T1 font encoding is used. + # For the same reason we never convert to \q{} in the other direction. + type = 'v' + elif char in accented_map: + char = accented_map[char] + elif (len(char) > 1): + # We can only convert accents on a single char + return '' + a = accent_map.get(type) + if a: + return unicodedata.normalize("NFC", "%s%s" % (char, a)) + return '' + + +def convert_ertbackslash(body, i, ert, default_layout): + r""" ------------------------------------------------------------------------------------------- + Convert backslashes and '\n' into valid ERT code, append the converted + text to body[i] and return the (maybe incremented) line index i""" + + for c in ert: + if c == '\\': + body[i] = body[i] + '\\backslash ' + i = i + 1 + body.insert(i, '') + elif c == '\n': + body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, ''] + i = i + 4 + else: + body[i] = body[i] + c + return i + + +def convert_accent(document): + # The following forms are supported by LyX: + # '\i \"{a}' (standard form, as written by LyX) + # '\i \"{}' (standard form, as written by LyX if the accented char is a space) + # '\i \"{ }' (also accepted if the accented char is a space) + # '\i \" a' (also accepted) + # '\i \"' (also accepted) + re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$') + re_contents = re.compile(r'^([^\s{]+)(.*)$') + re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$') + i = 0 + while True: + i = find_re(document.body, re_wholeinset, i) + if i == -1: + return + match = re_wholeinset.match(document.body[i]) + prefix = match.group(1) + contents = match.group(3).strip() + match = re_contents.match(contents) + if match: + # Strip first char (always \) + accent = match.group(1)[1:] + accented_contents = match.group(2).strip() + match = re_accentedcontents.match(accented_contents) + accented_char = match.group(1) + converted = _convert_accent(accent, accented_char) + if converted == '': + # Normalize contents + contents = '%s{%s}' % (accent, accented_char), + else: + document.body[i] = '%s%s' % (prefix, converted) + i += 1 + continue + document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents) + document.body[i] = prefix + document.body[i+1:i+1] = ['\\begin_inset ERT', + 'status collapsed', + '', + '\\begin_layout %s' % document.default_layout, + '', + '', + ''] + i = convert_ertbackslash(document.body, i + 7, + '\\%s' % contents, + document.default_layout) + document.body[i+1:i+1] = ['\\end_layout', + '', + '\\end_inset'] + i += 3 + + +def is_inset_line(document, i): + """ Line i of body has an inset """ + if document.body[i][:1] == '\\': + return True + last_tokens = "".join(document.body[i].split()[-2:]) + return last_tokens.find('\\') != -1 + + +# A wrapper around normalize that handles special cases (cf. bug 3313) +def normalize(form, text): + # do not normalize OHM, ANGSTROM + keep_characters = [0x2126,0x212b] + result = '' + convert = '' + for i in text: + if ord(i) in keep_characters: + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + convert = '' + result = result + i + else: + convert = convert + i + if len(convert) > 0: + result = result + unicodedata.normalize(form, convert) + return result + + +def revert_accent(document): + inverse_accent_map = {} + for k in accent_map: + inverse_accent_map[accent_map[k]] = k + inverse_special_accent_map = {} + for k in special_accent_map: + inverse_special_accent_map[special_accent_map[k]] = k + inverse_accented_map = {} + for k in accented_map: + inverse_accented_map[accented_map[k]] = k + + # Since LyX may insert a line break within a word we must combine all + # words before unicode normalization. + # We do this only if the next line starts with an accent, otherwise we + # would create things like '\begin_inset ERTstatus'. + for i in range(len(document.body) - 1): + if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ': + continue + if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)): + # the last character of this line and the first of the next line + # form probably a surrogate pair, inline insets are excluded (second part of the test) + while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '): + document.body[i] += document.body[i+1][0] + document.body[i+1] = document.body[i+1][1:] + + # Normalize to "Normal form D" (NFD, also known as canonical decomposition). + # This is needed to catch all accented characters. + for i in range(len(document.body)): + # Unfortunately we have a mixture of unicode strings and plain strings, + # because we never use u'xxx' for string literals, but 'xxx'. + # Therefore we may have to try two times to normalize the data. + try: + document.body[i] = normalize("NFD", document.body[i]) + except TypeError: + document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8')) + + # Replace accented characters with InsetLaTeXAccent + # Do not convert characters that can be represented in the chosen + # encoding. + encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)] + lang_re = re.compile(r"^\\lang\s(\S+)") + + i = 0 + while i < len(document.body): + if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '': + # Track the encoding of the current line + result = lang_re.match(document.body[i]) + if result: + language = result.group(1) + if language == "default": + encoding_stack[-1] = document.encoding + else: + from lyx2lyx_lang import lang + encoding_stack[-1] = lang[language][3] + continue + elif find_token(document.body, "\\begin_layout", i, i + 1) == i: + encoding_stack.append(encoding_stack[-1]) + continue + elif find_token(document.body, "\\end_layout", i, i + 1) == i: + del encoding_stack[-1] + continue + + for j in range(len(document.body[i])): + # dotless i and dotless j are both in special_accent_map and can + # occur as an accented character, so we need to test that the + # following character is no accent + if (document.body[i][j] in inverse_special_accent_map and + (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)): + accent = document.body[i][j] + try: + dummy = accent.encode(encoding_stack[-1]) + except UnicodeEncodeError: + # Insert the rest of the line as new line + if j < len(document.body[i]) - 1: + document.body.insert(i+1, document.body[i][j+1:]) + # Delete the accented character + document.body[i] = document.body[i][:j] + # Finally add the InsetLaTeXAccent + document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent] + break + elif j > 0 and document.body[i][j] in inverse_accent_map: + accented_char = document.body[i][j-1] + if accented_char == ' ': + # Conform to LyX output + accented_char = '' + elif accented_char in inverse_accented_map: + accented_char = inverse_accented_map[accented_char] + accent = document.body[i][j] + try: + dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1]) + except UnicodeEncodeError: + # Insert the rest of the line as new line + if j < len(document.body[i]) - 1: + document.body.insert(i+1, document.body[i][j+1:]) + # Delete the accented characters + document.body[i] = document.body[i][:j-1] + # Finally add the InsetLaTeXAccent + document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char) + break + i = i + 1 + + # Normalize to "Normal form C" (NFC, pre-composed characters) again + for i in range(len(document.body)): + document.body[i] = normalize("NFC", document.body[i]) + + +def normalize_font_whitespace_259(document): + """ Before format 259 the font changes were ignored if a + whitespace was the first or last character in the sequence, this function + transfers the whitespace outside.""" + + char_properties = {"\\series": "default", + "\\emph": "default", + "\\color": "none", + "\\shape": "default", + "\\bar": "default", + "\\family": "default"} + return normalize_font_whitespace(document, char_properties) + +def normalize_font_whitespace_274(document): + """ Before format 259 (sic) the font changes were ignored if a + whitespace was the first or last character in the sequence. This was + corrected for most font properties in format 259, but the language + was forgotten then. This function applies the same conversion done + there (namely, transfers the whitespace outside) for font language + changes, as well.""" + + char_properties = {"\\lang": "default"} + return normalize_font_whitespace(document, char_properties) + +def get_paragraph_language(document, i): + """ Return the language of the paragraph in which line i of the document + body is. If the first thing in the paragraph is a \\lang command, that + is the paragraph's langauge; otherwise, the paragraph's language is the + document's language.""" + + lines = document.body + + first_nonempty_line = \ + find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1) + + words = lines[first_nonempty_line].split() + + if len(words) > 1 and words[0] == "\\lang": + return words[1] + else: + return document.language + +def normalize_font_whitespace(document, char_properties): + """ Before format 259 the font changes were ignored if a + whitespace was the first or last character in the sequence, this function + transfers the whitespace outside. Only a change in one of the properties + in the provided char_properties is handled by this function.""" + + if document.backend != "latex": + return + + lines = document.body + + changes = {} + + i = 0 + while i < len(lines): + words = lines[i].split() + + if len(words) > 0 and words[0] == "\\begin_layout": + # a new paragraph resets all font changes + changes.clear() + # also reset the default language to be the paragraph's language + if "\\lang" in list(char_properties.keys()): + char_properties["\\lang"] = \ + get_paragraph_language(document, i + 1) + + elif len(words) > 1 and words[0] in list(char_properties.keys()): + # we have a font change + if char_properties[words[0]] == words[1]: + # property gets reset + if words[0] in list(changes.keys()): + del changes[words[0]] + defaultproperty = True + else: + # property gets set + changes[words[0]] = words[1] + defaultproperty = False + + # We need to explicitly reset all changed properties if we find + # a space below, because LyX 1.4 would output the space after + # closing the previous change and before starting the new one, + # and closing a font change means to close all properties, not + # just the changed one. + + if lines[i-1] and lines[i-1][-1] == " ": + lines[i-1] = lines[i-1][:-1] + # a space before the font change + added_lines = [" "] + for k in list(changes.keys()): + # exclude property k because that is already in lines[i] + if k != words[0]: + added_lines[1:1] = ["%s %s" % (k, changes[k])] + for k in list(changes.keys()): + # exclude property k because that must be added below anyway + if k != words[0]: + added_lines[0:0] = ["%s %s" % (k, char_properties[k])] + if defaultproperty: + # Property is reset in lines[i], so add the new stuff afterwards + lines[i+1:i+1] = added_lines + else: + # Reset property for the space + added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])] + lines[i:i] = added_lines + i = i + len(added_lines) + + elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty): + # a space after the font change + if (lines[i+1] == " " and lines[i+2]): + next_words = lines[i+2].split() + if len(next_words) > 0 and next_words[0] == words[0]: + # a single blank with a property different from the + # previous and the next line must not be changed + i = i + 2 + continue + lines[i+1] = lines[i+1][1:] + added_lines = [" "] + for k in list(changes.keys()): + # exclude property k because that is already in lines[i] + if k != words[0]: + added_lines[1:1] = ["%s %s" % (k, changes[k])] + for k in list(changes.keys()): + # exclude property k because that must be added below anyway + if k != words[0]: + added_lines[0:0] = ["%s %s" % (k, char_properties[k])] + # Reset property for the space + added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])] + lines[i:i] = added_lines + i = i + len(added_lines) + + i = i + 1 + + +def revert_utf8x(document): + " Set utf8x encoding to utf8. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "utf8x": + document.header[i] = "\\inputencoding utf8" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def revert_utf8plain(document): + " Set utf8plain encoding to utf8. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "utf8-plain": + document.header[i] = "\\inputencoding utf8" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def revert_beamer_alert(document): + " Revert beamer's \\alert inset back to ERT. " + i = 0 + while True: + i = find_token(document.body, "\\begin_inset CharStyle Alert", i) + if i == -1: + return + document.body[i] = "\\begin_inset ERT" + i = i + 1 + while True: + if (document.body[i][:13] == "\\begin_layout"): + # Insert the \alert command + document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}' + break + i = i + 1 + + i = i + 1 + + +def revert_beamer_structure(document): + " Revert beamer's \\structure inset back to ERT. " + i = 0 + while True: + i = find_token(document.body, "\\begin_inset CharStyle Structure", i) + if i == -1: + return + document.body[i] = "\\begin_inset ERT" + i = i + 1 + while True: + if (document.body[i][:13] == "\\begin_layout"): + document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}' + break + i = i + 1 + + i = i + 1 + + +def convert_changes(document): + " Switch output_changes off if tracking_changes is off. " + i = find_token(document.header, '\\tracking_changes', 0) + if i == -1: + document.warning("Malformed lyx document: Missing '\\tracking_changes'.") + return + j = find_token(document.header, '\\output_changes', 0) + if j == -1: + document.warning("Malformed lyx document: Missing '\\output_changes'.") + return + tracking_changes = get_value(document.header, "\\tracking_changes", i) + output_changes = get_value(document.header, "\\output_changes", j) + if tracking_changes == "false" and output_changes == "true": + document.header[j] = "\\output_changes false" + + +def revert_ascii(document): + " Set ascii encoding to auto. " + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc == "ascii": + document.header[i] = "\\inputencoding auto" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + +def normalize_language_name(document): + lang = { "brazil": "brazilian", + "portuges": "portuguese"} + + if document.language in lang: + document.language = lang[document.language] + i = find_token(document.header, "\\language", 0) + document.header[i] = "\\language %s" % document.language + + +def revert_language_name(document): + lang = { "brazilian": "brazil", + "portuguese": "portuges"} + + if document.language in lang: + document.language = lang[document.language] + i = find_token(document.header, "\\language", 0) + document.header[i] = "\\language %s" % document.language + +# +# \textclass cv -> \textclass simplecv +def convert_cv_textclass(document): + if document.textclass == "cv": + document.textclass = "simplecv" + + +def revert_cv_textclass(document): + if document.textclass == "simplecv": + document.textclass = "cv" + + +# +# add scaleBeforeRotation graphics param +def convert_graphics_rotation(document): + " add scaleBeforeRotation graphics parameter. " + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Graphics", i) + if i == -1: + return + j = find_end_of_inset(document.body, i+1) + if j == -1: + # should not happen + document.warning("Malformed LyX document: Could not find end of graphics inset.") + # Seach for rotateAngle and width or height or scale + # If these params are not there, nothing needs to be done. + k = find_token(document.body, "\trotateAngle", i + 1, j) + l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j) + if (k != -1 and l != -1): + document.body.insert(j, 'scaleBeforeRotation') + i = i + 1 + + +# +# remove scaleBeforeRotation graphics param +def revert_graphics_rotation(document): + " remove scaleBeforeRotation graphics parameter. " + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Graphics", i) + if i == -1: + return + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # should not happen + document.warning("Malformed LyX document: Could not find end of graphics inset.") + # If there's a scaleBeforeRotation param, just remove that + k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j) + if k != -1: + del document.body[k] + else: + # if not, and if we have rotateAngle and width or height or scale, + # we have to put the rotateAngle value to special + rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j) + special = get_value(document.body, 'special', i + 1, j) + if rotateAngle != "": + k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j) + if k == -1: + break + if special == "": + document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle) + else: + l = find_token(document.body, "\tspecial", i + 1, j) + document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special)) + k = find_token(document.body, "\trotateAngle", i + 1, j) + if k != -1: + del document.body[k] + i = i + 1 + + + +def convert_tableborder(document): + # The problem is: LyX doubles the table cell border as it ignores the "|" character in + # the cell arguments. A fix takes care of this and therefore the "|" has to be removed + i = 0 + while i < len(document.body): + h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i])) + k = document.body[i].find("|>{", 0, len(document.body[i])) + # the two tokens have to be in one line + if (h != -1 and k != -1): + # delete the "|" + document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])] + i = i + 1 + + +def revert_tableborder(document): + i = 0 + while i < len(document.body): + h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i])) + k = document.body[i].find(">{", 0, len(document.body[i])) + # the two tokens have to be in one line + if (h != -1 and k != -1): + # add the "|" + document.body[i] = document.body[i][:k] + '|' + document.body[i][k:] + i = i + 1 + + +def revert_armenian(document): + + # set inputencoding from armscii8 to auto + if document.inputencoding == "armscii8": + i = find_token(document.header, "\\inputencoding", 0) + if i != -1: + document.header[i] = "\\inputencoding auto" + # check if preamble exists, if not k is set to -1 + i = 0 + k = -1 + while i < len(document.preamble): + if k == -1: + k = document.preamble[i].find("\\", 0, len(document.preamble[i])) + if k == -1: + k = document.preamble[i].find("%", 0, len(document.preamble[i])) + i = i + 1 + # add the entry \usepackage{armtex} to the document preamble + if document.language == "armenian": + # set the armtex entry as the first preamble line + if k != -1: + document.preamble[0:0] = ["\\usepackage{armtex}"] + # create the preamble when it doesn't exist + else: + document.preamble.append('\\usepackage{armtex}') + # Set document language from armenian to english + if document.language == "armenian": + document.language = "english" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language english" + + +def revert_CJK(document): + " Set CJK encodings to default and languages chinese, japanese and korean to english. " + encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS", + "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"] + i = find_token(document.header, "\\inputencoding", 0) + if i == -1: + document.header.append("\\inputencoding auto") + else: + inputenc = get_value(document.header, "\\inputencoding", i) + if inputenc in encodings: + document.header[i] = "\\inputencoding default" + document.inputencoding = get_value(document.header, "\\inputencoding", 0) + + if document.language == "chinese-simplified" or \ + document.language == "chinese-traditional" or \ + document.language == "japanese" or document.language == "korean": + document.language = "english" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language english" + + +def revert_preamble_listings_params(document): + r" Revert preamble option \listings_params " + i = find_token(document.header, "\\listings_params", 0) + if i != -1: + document.preamble.append('\\usepackage{listings}') + document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"')) + document.header.pop(i); + + +def revert_listings_inset(document): + r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate +FROM + +\begin_inset +lstparams "language=Delphi" +inline true +status open + +\begin_layout Standard +var i = 10; +\end_layout + +\end_inset + +TO + +\begin_inset ERT +status open +\begin_layout Standard + + +\backslash +lstinline[language=Delphi]{var i = 10;} +\end_layout + +\end_inset + +There can be an caption inset in this inset + +\begin_layout Standard +\begin_inset Caption + +\begin_layout Standard +before label +\begin_inset LatexCommand label +name "lst:caption" + +\end_inset + +after label +\end_layout + +\end_inset + + +\end_layout + +''' + i = 0 + while True: + i = find_token(document.body, '\\begin_inset listings', i) + if i == -1: + break + else: + if not '\\usepackage{listings}' in document.preamble: + document.preamble.append('\\usepackage{listings}') + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # this should not happen + break + inline = 'false' + params = '' + status = 'open' + # first three lines + for line in range(i + 1, i + 4): + if document.body[line].startswith('inline'): + inline = document.body[line].split()[1] + if document.body[line].startswith('lstparams'): + params = document.body[line].split()[1].strip('"') + if document.body[line].startswith('status'): + status = document.body[line].split()[1].strip() + k = line + 1 + # caption? + caption = '' + label = '' + cap = find_token(document.body, '\\begin_inset Caption', i) + if cap != -1: + cap_end = find_end_of_inset(document.body, cap + 1) + if cap_end == -1: + # this should not happen + break + # label? + lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1) + if lbl != -1: + lbl_end = find_end_of_inset(document.body, lbl + 1) + if lbl_end == -1: + # this should not happen + break + else: + lbl = cap_end + lbl_end = cap_end + for line in document.body[lbl : lbl_end + 1]: + if line.startswith('name '): + label = line.split()[1].strip('"') + break + for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]: + if not line.startswith('\\'): + caption += line.strip() + k = cap_end + 1 + inlinecode = '' + # looking for the oneline code for lstinline + inlinecode = document.body[find_end_of_layout(document.body, + find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1] + if len(caption) > 0: + if len(params) == 0: + params = 'caption={%s}' % caption + else: + params += ',caption={%s}' % caption + if len(label) > 0: + if len(params) == 0: + params = 'label={%s}' % label + else: + params += ',label={%s}' % label + if len(params) > 0: + params = '[%s]' % params + params = params.replace('\\', '\\backslash\n') + if inline == 'true': + document.body[i:(j+1)] = [r'\begin_inset ERT', + 'status %s' % status, + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + 'lstinline%s{%s}' % (params, inlinecode), + r'\end_layout', + '', + r'\end_inset'] + else: + document.body[i: j+1] = [r'\begin_inset ERT', + 'status %s' % status, + '', + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + r'begin{lstlisting}%s' % params, + r'\end_layout', + '', + r'\begin_layout %s' % document.default_layout, + ] + document.body[k : j - 1] + \ + ['', + r'\begin_layout %s' % document.default_layout, + '', + r'\backslash', + 'end{lstlisting}', + r'\end_layout', + '', + r'\end_inset'] + + +def revert_include_listings(document): + r''' Revert lstinputlisting Include option , translate +\begin_inset Include \lstinputlisting{file}[opt] +preview false + +\end_inset + +TO + +\begin_inset ERT +status open + +\begin_layout Standard + + +\backslash +lstinputlisting{file}[opt] +\end_layout + +\end_inset + ''' + + i = 0 + while True: + i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i) + if i == -1: + break + else: + if not '\\usepackage{listings}' in document.preamble: + document.preamble.append('\\usepackage{listings}') + j = find_end_of_inset(document.body, i + 1) + if j == -1: + # this should not happen + break + # find command line lstinputlisting{file}[options] + cmd, file, option = '', '', '' + if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]): + cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups() + option = option.replace('\\', '\\backslash\n') + document.body[i : j + 1] = [r'\begin_inset ERT', + 'status open', + '', + r'\begin_layout %s' % document.default_layout, + '', + '', + r'\backslash', + '%s%s{%s}' % (cmd, option, file), + r'\end_layout', + '', + r'\end_inset'] + + +def revert_ext_font_sizes(document): + if document.backend != "latex": return + if not document.textclass.startswith("ext"): return + + fontsize = get_value(document.header, '\\paperfontsize', 0) + if fontsize not in ('10', '11', '12'): return + fontsize += 'pt' + + i = find_token(document.header, '\\paperfontsize', 0) + document.header[i] = '\\paperfontsize default' + insert_document_option(document, fontsize) + + +def convert_ext_font_sizes(document): + if document.backend != "latex": return + if not document.textclass.startswith("ext"): return + + fontsize = get_value(document.header, '\\paperfontsize', 0) + if fontsize != 'default': return + + i = find_token(document.header, '\\options', 0) + if i == -1: return + + options = get_value(document.header, '\\options', i) + + fontsizes = '10pt', '11pt', '12pt' + for fs in fontsizes: + if options.find(fs) != -1: + break + else: # this else will only be attained if the for cycle had no match + return + + options = options.split(',') + for j, opt in enumerate(options): + if opt in fontsizes: + fontsize = opt[:-2] + del options[j] + break + else: + return + + k = find_token(document.header, '\\paperfontsize', 0) + document.header[k] = '\\paperfontsize %s' % fontsize + + if options: + document.header[i] = '\\options %s' % ','.join(options) + else: + del document.header[i] + + +def revert_separator_layout(document): + r'''Revert --Separator-- to a lyx note +From + +\begin_layout --Separator-- +something +\end_layout + +to + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Standard +Separate Evironment +\end_layout + +\end_inset +something + +\end_layout + + ''' + + i = 0 + while True: + i = find_token(document.body, r'\begin_layout --Separator--', i) + if i == -1: + break + j = find_end_of_layout(document.body, i + 1) + if j == -1: + # this should not happen + break + document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout, + r'\begin_inset Note Note', + 'status open', + '', + r'\begin_layout %s' % document.default_layout, + 'Separate Environment', + r'\end_layout', + '', + r'\end_inset'] + \ + document.body[ i + 1 : j] + \ + ['', + r'\end_layout' + ] + + +def convert_arabic (document): + if document.language == "arabic": + document.language = "arabic_arabtex" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language arabic_arabtex" + i = 0 + while i < len(document.body): + h = document.body[i].find(r"\lang arabic", 0, len(document.body[i])) + if (h != -1): + # change the language name + document.body[i] = r'\lang arabic_arabtex' + i = i + 1 + + +def revert_arabic (document): + if document.language == "arabic_arabtex": + document.language = "arabic" + i = find_token(document.header, "\\language", 0) + if i != -1: + document.header[i] = "\\language arabic" + i = 0 + while i < len(document.body): + h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i])) + if (h != -1): + # change the language name + document.body[i] = r'\lang arabic' + i = i + 1 ## @@ -502,12 +2038,62 @@ convert = [[246, []], [249, [convert_utf8]], [250, []], [251, []], - [252, [convert_commandparams, convert_bibitem]]] + [252, [convert_commandparams, convert_bibitem]], + [253, []], + [254, [convert_esint]], + [255, []], + [256, []], + [257, [convert_caption]], + [258, [convert_lyxline]], + [259, [convert_accent, normalize_font_whitespace_259]], + [260, []], + [261, [convert_changes]], + [262, []], + [263, [normalize_language_name]], + [264, [convert_cv_textclass]], + [265, [convert_tableborder]], + [266, []], + [267, []], + [268, []], + [269, []], + [270, []], + [271, [convert_ext_font_sizes]], + [272, []], + [273, []], + [274, [normalize_font_whitespace_274]], + [275, [convert_graphics_rotation]], + [276, [convert_arabic]] + ] -revert = [[251, [revert_commandparams]], +revert = [ + [275, [revert_arabic]], + [274, [revert_graphics_rotation]], + [273, []], + [272, [revert_separator_layout]], + [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]], + [270, [revert_ext_font_sizes]], + [269, [revert_beamer_alert, revert_beamer_structure]], + [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]], + [267, [revert_CJK]], + [266, [revert_utf8plain]], + [265, [revert_armenian]], + [264, [revert_tableborder]], + [263, [revert_cv_textclass]], + [262, [revert_language_name]], + [261, [revert_ascii]], + [260, []], + [259, [revert_utf8x]], + [258, []], + [257, []], + [256, [revert_caption]], + [255, [revert_encodings]], + [254, [revert_clearpage, revert_cleardoublepage]], + [253, [revert_esint]], + [252, [revert_nomenclature, revert_printnomenclature]], + [251, [revert_commandparams]], [250, [revert_cs_label]], [249, []], - [248, [revert_utf8]], + [248, [revert_accent, revert_utf8, revert_unicode]], [247, [revert_booktabs]], [246, [revert_font_settings]], [245, [revert_framed]]] @@ -515,4 +2101,3 @@ revert = [[251, [revert_commandparams]], if __name__ == "__main__": pass -