X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2FLyX.py;h=3d62da5d4b786985d3ca8c8b523ac1190d4e1ead;hb=e9cbd69f6ef3a41260f1ed4a7f66b48acdcd52a8;hp=04dc3e7a90a5b4d7212b4de42fe2c682cba1fd96;hpb=1fc3d051b18eda099668daffa6b6c4a42c7ff8c8;p=lyx.git diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py index 04dc3e7a90..3d62da5d4b 100644 --- a/lib/lyx2lyx/LyX.py +++ b/lib/lyx2lyx/LyX.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx # -*- coding: utf-8 -*- -# Copyright (C) 2002-2015 The LyX Team +# Copyright (C) 2002-2018 The LyX Team # Copyright (C) 2002-2004 Dekel Tsur # Copyright (C) 2002-2006 José Matos # @@ -20,23 +20,31 @@ " The LyX module has all the rules related with different lyx file formats." -from parser_tools import get_value, check_token, find_token, \ - find_tokens, find_end_of +from parser_tools import (get_value, check_token, find_token, find_tokens, + find_end_of, find_complete_lines) import os.path import gzip import locale import sys import re import time +import io +import codecs try: import lyx2lyx_version version__ = lyx2lyx_version.version + stable_version = True except: # we are running from build directory so assume the last version - version__ = '2.2' + version__ = '2.4' + stable_version = False default_debug__ = 2 +# Provide support for both python 2 and 3 +PY2 = sys.version_info[0] == 2 +# End of code to support for both python 2 and 3 + #################################################################### # Private helper functions @@ -63,8 +71,8 @@ def minor_versions(major, last_minor_version): # Regular expressions used format_re = re.compile(r"(\d)[\.,]?(\d\d)") fileformat = re.compile(r"\\lyxformat\s*(\S*)") -original_version = re.compile(r".*?LyX ([\d.]*)") -original_tex2lyx_version = re.compile(r".*?tex2lyx ([\d.]*)") +original_version = re.compile(b".*?LyX ([\\d.]*)") +original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)") ## # file format information: @@ -80,12 +88,15 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)), ("1_1_6_3", [218], ["1.1", "1.1.6.3","1.1.6.4"]), ("1_2", [220], minor_versions("1.2" , 4)), ("1_3", [221], minor_versions("1.3" , 7)), + # Note that range(i,j) is up to j *excluded*. ("1_4", list(range(222,246)), minor_versions("1.4" , 5)), ("1_5", list(range(246,277)), minor_versions("1.5" , 7)), ("1_6", list(range(277,346)), minor_versions("1.6" , 10)), ("2_0", list(range(346,414)), minor_versions("2.0" , 8)), - ("2_1", list(range(414,475)), minor_versions("2.1" , 0)), - ("2_2", list(range(475,502)), minor_versions("2.2" , 0)) + ("2_1", list(range(414,475)), minor_versions("2.1" , 5)), + ("2_2", list(range(475,509)), minor_versions("2.2" , 4)), + ("2_3", list(range(509,545)), minor_versions("2.3" , 0)), + ("2_4", (), minor_versions("2.4" , 0)) ] #################################################################### @@ -111,19 +122,29 @@ def formats_list(): def format_info(): - " Returns a list with supported file formats." - out = """Major version: - minor versions - formats + " Returns a list with the supported file formats." + template = """ +%s\tstable format: %s + \tstable versions: %s + \tdevelopment formats: %s """ + + out = "version: formats and versions" for version in format_relation: major = str(version[2][0]) versions = str(version[2][1:]) if len(version[1]) == 1: formats = str(version[1][0]) + stable_format = str(version[1][0]) + elif not stable_version and major == version__: + stable_format = "-- not yet --" + versions = "-- not yet --" + formats = "%s - %s" % (version[1][0], version[1][-1]) else: - formats = "%s - %s" % (version[1][-1], version[1][0]) - out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats) + formats = "%s - %s" % (version[1][0], version[1][-2]) + stable_format = str(version[1][-1]) + + out += template % (major, stable_format, versions, formats) return out + '\n' @@ -156,6 +177,17 @@ def trim_eol(line): return line[:-1] +def trim_eol_binary(line): + " Remove end of line char(s)." + if line[-1] != 10 and line[-1] != 13: + # May happen for the last line of a document + return line + if line[-2:-1] == 13: + return line[:-2] + else: + return line[:-1] + + def get_encoding(language, inputencoding, format, cjk_encoding): " Returns enconding of the lyx file" if format > 248: @@ -186,10 +218,10 @@ def get_encoding(language, inputencoding, format, cjk_encoding): class LyX_base: """This class carries all the information of the LyX file.""" - def __init__(self, end_format = 0, input = "", output = "", error = "", - debug = default_debug__, try_hard = 0, cjk_encoding = '', - final_version = "", systemlyxdir = '', language = "english", - encoding = "auto"): + def __init__(self, end_format = 0, input = u'', output = u'', error = u'', + debug = default_debug__, try_hard = 0, cjk_encoding = u'', + final_version = u'', systemlyxdir = u'', language = u'english', + encoding = u'auto'): """Arguments: end_format: final format that the file should be converted. (integer) @@ -198,7 +230,8 @@ class LyX_base: error: the name of the error file, if empty use the standard error. debug: debug level, O means no debug, as its value increases be more verbose. """ - self.choose_io(input, output) + self.choose_input(input) + self.output = output if error: self.err = open(error, "w") @@ -278,23 +311,50 @@ class LyX_base: """Reads a file into the self.header and self.body parts, from self.input.""" + # First pass: Read header to determine file encoding + # If we are running under python3 then all strings are binary in this + # pass. In some cases we need to convert binary to unicode in order to + # use our parser tools. Since we do not know the true encoding yet we + # use latin1. This works since a) the parts we are interested in are + # pure ASCII (subset of latin1) and b) in contrast to pure ascii or + # utf8, one can decode any 8byte string using latin1. + first_line = True while True: line = self.input.readline() if not line: - self.error("Invalid LyX file.") + # eof found before end of header + self.error("Invalid LyX file: Missing body.") + + if first_line: + # Remove UTF8 BOM marker if present + if line.startswith(codecs.BOM_UTF8): + line = line[len(codecs.BOM_UTF8):] + + first_line = False - line = trim_eol(line) - if check_token(line, '\\begin_preamble'): - while 1: + if PY2: + line = trim_eol(line) + decoded = line + else: + line = trim_eol_binary(line) + decoded = line.decode('latin1') + if check_token(decoded, '\\begin_preamble'): + while True: line = self.input.readline() if not line: - self.error("Invalid LyX file.") + # eof found before end of header + self.error("Invalid LyX file: Missing body.") - line = trim_eol(line) - if check_token(line, '\\end_preamble'): + if PY2: + line = trim_eol(line) + decoded = line + else: + line = trim_eol_binary(line) + decoded = line.decode('latin1') + if check_token(decoded, '\\end_preamble'): break - if line.split()[:0] in ("\\layout", + if decoded.split()[:0] in ("\\layout", "\\begin_layout", "\\begin_body"): self.warning("Malformed LyX file:" @@ -304,33 +364,49 @@ class LyX_base: self.preamble.append(line) - if check_token(line, '\\end_preamble'): + if check_token(decoded, '\\end_preamble'): continue - line = line.strip() + line = line.rstrip() if not line: continue - if line.split()[0] in ("\\layout", "\\begin_layout", + if decoded.split()[0] in ("\\layout", "\\begin_layout", "\\begin_body", "\\begin_deeper"): self.body.append(line) break self.header.append(line) - i = find_token(self.header, '\\textclass', 0) + if PY2: + i = find_token(self.header, '\\textclass', 0) + else: + i = find_token(self.header, b'\\textclass', 0) if i == -1: self.warning("Malformed LyX file: Missing '\\textclass'.") - i = find_token(self.header, '\\lyxformat', 0) + 1 - self.header[i:i] = ['\\textclass article'] - - self.textclass = get_value(self.header, "\\textclass", 0) - self.backend = get_backend(self.textclass) - self.format = self.read_format() - self.language = get_value(self.header, "\\language", 0, - default = "english") - self.inputencoding = get_value(self.header, "\\inputencoding", - 0, default = "auto") + if PY2: + i = find_token(self.header, '\\lyxformat', 0) + 1 + self.header[i:i] = ['\\textclass article'] + else: + i = find_token(self.header, b'\\lyxformat', 0) + 1 + self.header[i:i] = [b'\\textclass article'] + + if PY2: + self.textclass = get_value(self.header, "\\textclass", 0, + default = "") + self.language = get_value(self.header, "\\language", 0, + default = "english") + self.inputencoding = get_value(self.header, "\\inputencoding", 0, + default = "auto") + else: + self.textclass = get_value(self.header, b"\\textclass", 0, + default = b"") + self.language = get_value(self.header, b"\\language", 0, + default = b"english").decode('ascii') + self.inputencoding = get_value(self.header, b"\\inputencoding", 0, + default = b"auto").decode('ascii') + self.format = self.read_format() + self.initial_format = self.format self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding) @@ -339,13 +415,16 @@ class LyX_base: # Second pass over header and preamble, now we know the file encoding # Do not forget the textclass (Debian bug #700828) self.textclass = self.textclass.decode(self.encoding) + self.backend = get_backend(self.textclass) for i in range(len(self.header)): self.header[i] = self.header[i].decode(self.encoding) for i in range(len(self.preamble)): self.preamble[i] = self.preamble[i].decode(self.encoding) + for i in range(len(self.body)): + self.body[i] = self.body[i].decode(self.encoding) # Read document body - while 1: + while True: line = self.input.readline().decode(self.encoding) if not line: break @@ -354,6 +433,7 @@ class LyX_base: def write(self): " Writes the LyX file to self.output." + self.choose_output(self.output) self.set_version() self.set_format() self.set_textclass() @@ -367,30 +447,54 @@ class LyX_base: else: header = self.header - for line in header + [''] + self.body: - self.output.write(line.encode(self.encoding)+"\n") + for line in header + [u''] + self.body: + self.output.write(line+u'\n') - def choose_io(self, input, output): - """Choose input and output streams, dealing transparently with + def choose_output(self, output): + """Choose output streams dealing transparently with compressed files.""" - if output: - self.output = open(output, "wb") + # This is a bit complicated, because we need to be compatible both with + # python 2 and python 3. Therefore we handle the encoding here and not + # when writing individual lines and may need up to 3 layered file like + # interfaces. + if self.compressed: + if output: + outputfileobj = open(output, 'wb') + else: + # We cannot not use stdout directly since it needs text, not bytes in python 3 + outputfileobj = os.fdopen(sys.stdout.fileno(), 'wb') + # We cannot not use gzip.open() since it is not supported by python 2 + zipbuffer = gzip.GzipFile(mode='wb', fileobj=outputfileobj) + # We do not want to use different newlines on different OSes inside zipped files + self.output = io.TextIOWrapper(zipbuffer, encoding=self.encoding, newline='\n') else: - self.output = sys.stdout + if output: + self.output = io.open(output, 'w', encoding=self.encoding) + else: + self.output = io.open(sys.stdout.fileno(), 'w', encoding=self.encoding) + - if input and input != '-': + def choose_input(self, input): + """Choose input stream, dealing transparently with + compressed files.""" + + # Since we do not know the encoding yet we need to read the input as + # bytes in binary mode, and convert later to unicode. + if input and input != u'-': self.dir = os.path.dirname(os.path.abspath(input)) try: gzip.open(input).readline() self.input = gzip.open(input) - self.output = gzip.GzipFile(mode="wb", fileobj=self.output) + self.compressed = True except: - self.input = open(input) + self.input = open(input, 'rb') + self.compressed = False else: - self.dir = '' - self.input = sys.stdin + self.dir = u'' + self.input = os.fdopen(sys.stdin.fileno(), 'rb') + self.compressed = False def lyxformat(self, format): @@ -415,10 +519,10 @@ class LyX_base: file, returns the most likely value, or None otherwise.""" for line in self.header: - if line[0] != "#": + if line[0:1] != b"#": return None - line = line.replace("fix",".") + line = line.replace(b"fix",b".") # need to test original_tex2lyx_version first because tex2lyx # writes "#LyX file created by tex2lyx 2.2" result = original_tex2lyx_version.match(line) @@ -426,14 +530,14 @@ class LyX_base: result = original_version.match(line) if result: # Special know cases: reLyX and KLyX - if line.find("reLyX") != -1 or line.find("KLyX") != -1: + if line.find(b"reLyX") != -1 or line.find(b"KLyX") != -1: return "0.12" if result: res = result.group(1) if not res: self.warning(line) #self.warning("Version %s" % result.group(1)) - return res + return res.decode('ascii') if not PY2 else res self.warning(str(self.header[:2])) return None @@ -442,7 +546,7 @@ class LyX_base: " Set the header with the version used." initial_comment = " ".join(["#LyX %s created this file." % version__, - "For more info see http://www.lyx.org/"]) + "For more info see https://www.lyx.org/"]) # Simple heuristic to determine the comment that always starts # a lyx file @@ -463,11 +567,14 @@ class LyX_base: def read_format(self): " Read from the header the fileformat of the present LyX file." for line in self.header: - result = fileformat.match(line) + if PY2: + result = fileformat.match(line) + else: + result = fileformat.match(line.decode('ascii')) if result: return self.lyxformat(result.group(1)) else: - self.error("Invalid LyX File.") + self.error("Invalid LyX File: Missing format.") return None @@ -488,6 +595,7 @@ class LyX_base: #Note that the module will be added at the END of the extant ones def add_module(self, module): + " Append module to the modules list." i = find_token(self.header, "\\begin_modules", 0) if i == -1: #No modules yet included @@ -508,7 +616,16 @@ class LyX_base: self.header.insert(j, module) + def del_module(self, module): + " Delete `module` from module list, return success." + modlist = self.get_module_list() + if module not in modlist: + return False + self.set_module_list([line for line in modlist if line != module]) + return True + def get_module_list(self): + " Return list of modules." i = find_token(self.header, "\\begin_modules", 0) if (i == -1): return [] @@ -517,23 +634,23 @@ class LyX_base: def set_module_list(self, mlist): - modbegin = find_token(self.header, "\\begin_modules", 0) - newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules'] - if (modbegin == -1): + i = find_token(self.header, "\\begin_modules", 0) + if (i == -1): #No modules yet included tclass = find_token(self.header, "\\textclass", 0) if tclass == -1: self.warning("Malformed LyX document: No \\textclass!!") return - modbegin = tclass + 1 - self.header[modbegin:modbegin] = newmodlist - return - modend = find_token(self.header, "\\end_modules", modbegin) - if modend == -1: - self.warning("(set_module_list)Malformed LyX document: No \\end_modules.") - return - newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules'] - self.header[modbegin:modend + 1] = newmodlist + i = j = tclass + 1 + else: + j = find_token(self.header, "\\end_modules", i) + if j == -1: + self.warning("(set_module_list) Malformed LyX document: No \\end_modules.") + return + j += 1 + if mlist: + mlist = ['\\begin_modules'] + mlist + ['\\end_modules'] + self.header[i:j] = mlist def set_parameter(self, param, value): @@ -605,7 +722,6 @@ class LyX_base: conversion are taken. It returns a list of modules needed to convert the LyX file from self.format to self.end_format""" - self.start = self.format format = self.format correct_version = 0 @@ -639,7 +755,7 @@ class LyX_base: # Convertion mode, back or forth steps = [] - if (initial_step, self.start) < (final_step, self.end_format): + if (initial_step, self.initial_format) < (final_step, self.end_format): mode = "convert" full_steps = [] for step in format_relation: @@ -667,6 +783,53 @@ class LyX_base: return mode, steps + def append_local_layout(self, new_layout): + " Append `new_layout` to the local layouts." + # new_layout may be a string or a list of strings (lines) + try: + new_layout = new_layout.splitlines() + except AttributeError: + pass + i = find_token(self.header, "\\begin_local_layout", 0) + if i == -1: + k = find_token(self.header, "\\language", 0) + if k == -1: + # this should not happen + self.warning("Malformed LyX document! No \\language header found!") + return + self.header[k : k] = ["\\begin_local_layout", "\\end_local_layout"] + i = k + + j = find_end_of(self.header, i, "\\begin_local_layout", "\\end_local_layout") + if j == -1: + # this should not happen + self.warning("Malformed LyX document: Can't find end of local layout!") + return + + self.header[i+1 : i+1] = new_layout + + def del_local_layout(self, layout_def): + " Delete `layout_def` from local layouts, return success." + i = find_complete_lines(self.header, layout_def) + if i == -1: + return False + j = i+len(layout_def) + if (self.header[i-1] == "\\begin_local_layout" and + self.header[j] == "\\end_local_layout"): + i -=1 + j +=1 + self.header[i:j] = [] + return True + + def del_from_header(self, lines): + " Delete `lines` from the document header, return success." + i = find_complete_lines(self.header, lines) + if i == -1: + return False + j = i + len(lines) + self.header[i:j] = [] + return True + # Part of an unfinished attempt to make lyx2lyx gave a more # structured view of the document. # def get_toc(self, depth = 4): @@ -683,7 +846,7 @@ class LyX_base: # toc_par = [] # i = 0 -# while 1: +# while True: # i = find_tokens(self.body, sections, i) # if i == -1: # break @@ -733,9 +896,9 @@ class LyX_base: class File(LyX_base): " This class reads existing LyX files." - def __init__(self, end_format = 0, input = "", output = "", error = "", - debug = default_debug__, try_hard = 0, cjk_encoding = '', - final_version = '', systemlyxdir = ''): + def __init__(self, end_format = 0, input = u'', output = u'', error = u'', + debug = default_debug__, try_hard = 0, cjk_encoding = u'', + final_version = u'', systemlyxdir = u''): LyX_base.__init__(self, end_format, input, output, error, debug, try_hard, cjk_encoding, final_version, systemlyxdir)