# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
-# Copyright (C) 2002-2011 The LyX Team
+# Copyright (C) 2002-2018 The LyX Team
# Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
# Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
#
" The LyX module has all the rules related with different lyx file formats."
-from parser_tools import get_value, check_token, find_token, \
- find_tokens, find_end_of
+from parser_tools import (get_value, check_token, find_token, find_tokens,
+ find_end_of, find_complete_lines)
import os.path
import gzip
import locale
import sys
import re
import time
+import io
+import codecs
try:
import lyx2lyx_version
version__ = lyx2lyx_version.version
+ stable_version = True
except: # we are running from build directory so assume the last version
- version__ = '2.1'
+ version__ = '2.4'
+ stable_version = False
default_debug__ = 2
+# Provide support for both python 2 and 3
+PY2 = sys.version_info[0] == 2
+# End of code to support for both python 2 and 3
+
####################################################################
# Private helper functions
# Regular expressions used
format_re = re.compile(r"(\d)[\.,]?(\d\d)")
fileformat = re.compile(r"\\lyxformat\s*(\S*)")
-original_version = re.compile(r".*?LyX ([\d.]*)")
+original_version = re.compile(b".*?LyX ([\\d.]*)")
+original_tex2lyx_version = re.compile(b".*?tex2lyx ([\\d.]*)")
##
# file format information:
("1_1_6_3", [218], ["1.1", "1.1.6.3","1.1.6.4"]),
("1_2", [220], minor_versions("1.2" , 4)),
("1_3", [221], minor_versions("1.3" , 7)),
- ("1_4", range(222,246), minor_versions("1.4" , 5)),
- ("1_5", range(246,277), minor_versions("1.5" , 7)),
- ("1_6", range(277,346), minor_versions("1.6" , 10)),
- ("2_0", range(347,414), minor_versions("2.0", 0)),
- ("2_1", [], minor_versions("2.1", 0))
+ # Note that range(i,j) is up to j *excluded*.
+ ("1_4", list(range(222,246)), minor_versions("1.4" , 5)),
+ ("1_5", list(range(246,277)), minor_versions("1.5" , 7)),
+ ("1_6", list(range(277,346)), minor_versions("1.6" , 10)),
+ ("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
+ ("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
+ ("2_2", list(range(475,509)), minor_versions("2.2" , 4)),
+ ("2_3", list(range(509,545)), minor_versions("2.3" , 0)),
+ ("2_4", (), minor_versions("2.4" , 0))
]
####################################################################
def format_info():
- " Returns a list with supported file formats."
- out = """Major version:
- minor versions
- formats
+ " Returns a list with the supported file formats."
+ template = """
+%s\tstable format: %s
+ \tstable versions: %s
+ \tdevelopment formats: %s
"""
+
+ out = "version: formats and versions"
for version in format_relation:
major = str(version[2][0])
versions = str(version[2][1:])
if len(version[1]) == 1:
formats = str(version[1][0])
+ stable_format = str(version[1][0])
+ elif not stable_version and major == version__:
+ stable_format = "-- not yet --"
+ versions = "-- not yet --"
+ formats = "%s - %s" % (version[1][0], version[1][-1])
else:
- formats = "%s - %s" % (version[1][-1], version[1][0])
- out += "%s\n\t%s\n\t%s\n\n" % (major, versions, formats)
+ formats = "%s - %s" % (version[1][0], version[1][-2])
+ stable_format = str(version[1][-1])
+
+ out += template % (major, stable_format, versions, formats)
return out + '\n'
def trim_eol(line):
" Remove end of line char(s)."
+ if line[-1] != '\n' and line[-1] != '\r':
+ # May happen for the last line of a document
+ return line
if line[-2:-1] == '\r':
return line[:-2]
else:
return line[:-1]
+def trim_eol_binary(line):
+ " Remove end of line char(s)."
+ if line[-1] != 10 and line[-1] != 13:
+ # May happen for the last line of a document
+ return line
+ if line[-2:-1] == 13:
+ return line[:-2]
+ else:
+ return line[:-1]
+
+
def get_encoding(language, inputencoding, format, cjk_encoding):
" Returns enconding of the lyx file"
if format > 248:
class LyX_base:
"""This class carries all the information of the LyX file."""
- def __init__(self, end_format = 0, input = "", output = "", error = "",
- debug = default_debug__, try_hard = 0, cjk_encoding = '',
- final_version = "", language = "english", encoding = "auto"):
+ def __init__(self, end_format = 0, input = u'', output = u'', error = u'',
+ debug = default_debug__, try_hard = 0, cjk_encoding = u'',
+ final_version = u'', systemlyxdir = u'', language = u'english',
+ encoding = u'auto'):
"""Arguments:
end_format: final format that the file should be converted. (integer)
error: the name of the error file, if empty use the standard error.
debug: debug level, O means no debug, as its value increases be more verbose.
"""
- self.choose_io(input, output)
+ self.choose_input(input)
+ self.output = output
if error:
self.err = open(error, "w")
self.status = 0
self.encoding = encoding
self.language = language
+ self.systemlyxdir = systemlyxdir
def warning(self, message, debug_level= default_debug__):
"""Reads a file into the self.header and
self.body parts, from self.input."""
+ # First pass: Read header to determine file encoding
+ # If we are running under python3 then all strings are binary in this
+ # pass. In some cases we need to convert binary to unicode in order to
+ # use our parser tools. Since we do not know the true encoding yet we
+ # use latin1. This works since a) the parts we are interested in are
+ # pure ASCII (subset of latin1) and b) in contrast to pure ascii or
+ # utf8, one can decode any 8byte string using latin1.
+ first_line = True
while True:
line = self.input.readline()
if not line:
- self.error("Invalid LyX file.")
+ # eof found before end of header
+ self.error("Invalid LyX file: Missing body.")
+
+ if first_line:
+ # Remove UTF8 BOM marker if present
+ if line.startswith(codecs.BOM_UTF8):
+ line = line[len(codecs.BOM_UTF8):]
+
+ first_line = False
- line = trim_eol(line)
- if check_token(line, '\\begin_preamble'):
- while 1:
+ if PY2:
+ line = trim_eol(line)
+ decoded = line
+ else:
+ line = trim_eol_binary(line)
+ decoded = line.decode('latin1')
+ if check_token(decoded, '\\begin_preamble'):
+ while True:
line = self.input.readline()
if not line:
- self.error("Invalid LyX file.")
+ # eof found before end of header
+ self.error("Invalid LyX file: Missing body.")
- line = trim_eol(line)
- if check_token(line, '\\end_preamble'):
+ if PY2:
+ line = trim_eol(line)
+ decoded = line
+ else:
+ line = trim_eol_binary(line)
+ decoded = line.decode('latin1')
+ if check_token(decoded, '\\end_preamble'):
break
- if line.split()[:0] in ("\\layout",
+ if decoded.split()[:0] in ("\\layout",
"\\begin_layout", "\\begin_body"):
self.warning("Malformed LyX file:"
self.preamble.append(line)
- if check_token(line, '\\end_preamble'):
+ if check_token(decoded, '\\end_preamble'):
continue
- line = line.strip()
+ line = line.rstrip()
if not line:
continue
- if line.split()[0] in ("\\layout", "\\begin_layout",
+ if decoded.split()[0] in ("\\layout", "\\begin_layout",
"\\begin_body", "\\begin_deeper"):
self.body.append(line)
break
self.header.append(line)
- i = find_token(self.header, '\\textclass', 0)
+ if PY2:
+ i = find_token(self.header, '\\textclass', 0)
+ else:
+ i = find_token(self.header, b'\\textclass', 0)
if i == -1:
self.warning("Malformed LyX file: Missing '\\textclass'.")
- i = find_token(self.header, '\\lyxformat', 0) + 1
- self.header[i:i] = ['\\textclass article']
-
- self.textclass = get_value(self.header, "\\textclass", 0)
- self.backend = get_backend(self.textclass)
- self.format = self.read_format()
- self.language = get_value(self.header, "\\language", 0,
- default = "english")
- self.inputencoding = get_value(self.header, "\\inputencoding",
- 0, default = "auto")
+ if PY2:
+ i = find_token(self.header, '\\lyxformat', 0) + 1
+ self.header[i:i] = ['\\textclass article']
+ else:
+ i = find_token(self.header, b'\\lyxformat', 0) + 1
+ self.header[i:i] = [b'\\textclass article']
+
+ if PY2:
+ self.textclass = get_value(self.header, "\\textclass", 0,
+ default = "")
+ self.language = get_value(self.header, "\\language", 0,
+ default = "english")
+ self.inputencoding = get_value(self.header, "\\inputencoding", 0,
+ default = "auto")
+ else:
+ self.textclass = get_value(self.header, b"\\textclass", 0,
+ default = b"")
+ self.language = get_value(self.header, b"\\language", 0,
+ default = b"english").decode('ascii')
+ self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
+ default = b"auto").decode('ascii')
+ self.format = self.read_format()
+ self.initial_format = self.format
self.encoding = get_encoding(self.language,
self.inputencoding, self.format,
self.cjk_encoding)
# Second pass over header and preamble, now we know the file encoding
# Do not forget the textclass (Debian bug #700828)
self.textclass = self.textclass.decode(self.encoding)
+ self.backend = get_backend(self.textclass)
for i in range(len(self.header)):
self.header[i] = self.header[i].decode(self.encoding)
for i in range(len(self.preamble)):
self.preamble[i] = self.preamble[i].decode(self.encoding)
+ for i in range(len(self.body)):
+ self.body[i] = self.body[i].decode(self.encoding)
# Read document body
- while 1:
+ while True:
line = self.input.readline().decode(self.encoding)
if not line:
break
def write(self):
" Writes the LyX file to self.output."
+ self.choose_output(self.output)
self.set_version()
self.set_format()
self.set_textclass()
else:
header = self.header
- for line in header + [''] + self.body:
- self.output.write(line.encode(self.encoding)+"\n")
+ for line in header + [u''] + self.body:
+ self.output.write(line+u'\n')
- def choose_io(self, input, output):
- """Choose input and output streams, dealing transparently with
+ def choose_output(self, output):
+ """Choose output streams dealing transparently with
compressed files."""
- if output:
- self.output = open(output, "wb")
+ # This is a bit complicated, because we need to be compatible both with
+ # python 2 and python 3. Therefore we handle the encoding here and not
+ # when writing individual lines and may need up to 3 layered file like
+ # interfaces.
+ if self.compressed:
+ if output:
+ outputfileobj = open(output, 'wb')
+ else:
+ # We cannot not use stdout directly since it needs text, not bytes in python 3
+ outputfileobj = os.fdopen(sys.stdout.fileno(), 'wb')
+ # We cannot not use gzip.open() since it is not supported by python 2
+ zipbuffer = gzip.GzipFile(mode='wb', fileobj=outputfileobj)
+ # We do not want to use different newlines on different OSes inside zipped files
+ self.output = io.TextIOWrapper(zipbuffer, encoding=self.encoding, newline='\n')
else:
- self.output = sys.stdout
+ if output:
+ self.output = io.open(output, 'w', encoding=self.encoding)
+ else:
+ self.output = io.open(sys.stdout.fileno(), 'w', encoding=self.encoding)
+
- if input and input != '-':
+ def choose_input(self, input):
+ """Choose input stream, dealing transparently with
+ compressed files."""
+
+ # Since we do not know the encoding yet we need to read the input as
+ # bytes in binary mode, and convert later to unicode.
+ if input and input != u'-':
self.dir = os.path.dirname(os.path.abspath(input))
try:
gzip.open(input).readline()
self.input = gzip.open(input)
- self.output = gzip.GzipFile(mode="wb", fileobj=self.output)
+ self.compressed = True
except:
- self.input = open(input)
+ self.input = open(input, 'rb')
+ self.compressed = False
else:
- self.dir = ''
- self.input = sys.stdin
+ self.dir = u''
+ self.input = os.fdopen(sys.stdin.fileno(), 'rb')
+ self.compressed = False
def lyxformat(self, format):
file, returns the most likely value, or None otherwise."""
for line in self.header:
- if line[0] != "#":
+ if line[0:1] != b"#":
return None
- line = line.replace("fix",".")
- result = original_version.match(line)
+ line = line.replace(b"fix",b".")
+ # need to test original_tex2lyx_version first because tex2lyx
+ # writes "#LyX file created by tex2lyx 2.2"
+ result = original_tex2lyx_version.match(line)
+ if not result:
+ result = original_version.match(line)
+ if result:
+ # Special know cases: reLyX and KLyX
+ if line.find(b"reLyX") != -1 or line.find(b"KLyX") != -1:
+ return "0.12"
if result:
- # Special know cases: reLyX and KLyX
- if line.find("reLyX") != -1 or line.find("KLyX") != -1:
- return "0.12"
-
res = result.group(1)
if not res:
self.warning(line)
#self.warning("Version %s" % result.group(1))
- return res
+ return res.decode('ascii') if not PY2 else res
self.warning(str(self.header[:2]))
return None
def set_version(self):
" Set the header with the version used."
- self.header[0] = " ".join(["#LyX %s created this file." % version__,
- "For more info see http://www.lyx.org/"])
+
+ initial_comment = " ".join(["#LyX %s created this file." % version__,
+ "For more info see https://www.lyx.org/"])
+
+ # Simple heuristic to determine the comment that always starts
+ # a lyx file
+ if self.header[0].startswith("#"):
+ self.header[0] = initial_comment
+ else:
+ self.header.insert(0, initial_comment)
+
+ # Old lyx files had a two lines comment header:
+ # 1) the first line had the user who had created it
+ # 2) the second line had the lyx version used
+ # later we decided that 1) was a privacy risk for no gain
+ # here we remove the second line effectively erasing 1)
if self.header[1][0] == '#':
del self.header[1]
def read_format(self):
" Read from the header the fileformat of the present LyX file."
for line in self.header:
- result = fileformat.match(line)
+ if PY2:
+ result = fileformat.match(line)
+ else:
+ result = fileformat.match(line.decode('ascii'))
if result:
return self.lyxformat(result.group(1))
else:
- self.error("Invalid LyX File.")
+ self.error("Invalid LyX File: Missing format.")
return None
#Note that the module will be added at the END of the extant ones
def add_module(self, module):
+ " Append module to the modules list."
i = find_token(self.header, "\\begin_modules", 0)
if i == -1:
#No modules yet included
self.header.insert(j, module)
+ def del_module(self, module):
+ " Delete `module` from module list, return success."
+ modlist = self.get_module_list()
+ if module not in modlist:
+ return False
+ self.set_module_list([line for line in modlist if line != module])
+ return True
+
def get_module_list(self):
+ " Return list of modules."
i = find_token(self.header, "\\begin_modules", 0)
if (i == -1):
return []
def set_module_list(self, mlist):
- modbegin = find_token(self.header, "\\begin_modules", 0)
- newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
- if (modbegin == -1):
+ i = find_token(self.header, "\\begin_modules", 0)
+ if (i == -1):
#No modules yet included
tclass = find_token(self.header, "\\textclass", 0)
if tclass == -1:
self.warning("Malformed LyX document: No \\textclass!!")
return
- modbegin = tclass + 1
- self.header[modbegin:modbegin] = newmodlist
- return
- modend = find_token(self.header, "\\end_modules", modbegin)
- if modend == -1:
- self.warning("(set_module_list)Malformed LyX document: No \\end_modules.")
- return
- newmodlist = ['\\begin_modules'] + mlist + ['\\end_modules']
- self.header[modbegin:modend + 1] = newmodlist
+ i = j = tclass + 1
+ else:
+ j = find_token(self.header, "\\end_modules", i)
+ if j == -1:
+ self.warning("(set_module_list) Malformed LyX document: No \\end_modules.")
+ return
+ j += 1
+ if mlist:
+ mlist = ['\\begin_modules'] + mlist + ['\\end_modules']
+ self.header[i:j] = mlist
def set_parameter(self, param, value):
def convert(self):
"Convert from current (self.format) to self.end_format."
+ if self.format == self.end_format:
+ self.warning("No conversion needed: Target format %s "
+ "same as current format!" % self.format, default_debug__)
+ return
+
mode, conversion_chain = self.chain()
self.warning("conversion chain: " + str(conversion_chain), 3)
conversion are taken. It returns a list of modules needed to
convert the LyX file from self.format to self.end_format"""
- self.start = self.format
format = self.format
correct_version = 0
# Convertion mode, back or forth
steps = []
- if (initial_step, self.start) < (final_step, self.end_format):
+ if (initial_step, self.initial_format) < (final_step, self.end_format):
mode = "convert"
full_steps = []
for step in format_relation:
return mode, steps
+ def append_local_layout(self, new_layout):
+ " Append `new_layout` to the local layouts."
+ # new_layout may be a string or a list of strings (lines)
+ try:
+ new_layout = new_layout.splitlines()
+ except AttributeError:
+ pass
+ i = find_token(self.header, "\\begin_local_layout", 0)
+ if i == -1:
+ k = find_token(self.header, "\\language", 0)
+ if k == -1:
+ # this should not happen
+ self.warning("Malformed LyX document! No \\language header found!")
+ return
+ self.header[k : k] = ["\\begin_local_layout", "\\end_local_layout"]
+ i = k
+
+ j = find_end_of(self.header, i, "\\begin_local_layout", "\\end_local_layout")
+ if j == -1:
+ # this should not happen
+ self.warning("Malformed LyX document: Can't find end of local layout!")
+ return
+
+ self.header[i+1 : i+1] = new_layout
+
+ def del_local_layout(self, layout_def):
+ " Delete `layout_def` from local layouts, return success."
+ i = find_complete_lines(self.header, layout_def)
+ if i == -1:
+ return False
+ j = i+len(layout_def)
+ if (self.header[i-1] == "\\begin_local_layout" and
+ self.header[j] == "\\end_local_layout"):
+ i -=1
+ j +=1
+ self.header[i:j] = []
+ return True
+
+ def del_from_header(self, lines):
+ " Delete `lines` from the document header, return success."
+ i = find_complete_lines(self.header, lines)
+ if i == -1:
+ return False
+ j = i + len(lines)
+ self.header[i:j] = []
+ return True
+
# Part of an unfinished attempt to make lyx2lyx gave a more
# structured view of the document.
# def get_toc(self, depth = 4):
# toc_par = []
# i = 0
-# while 1:
+# while True:
# i = find_tokens(self.body, sections, i)
# if i == -1:
# break
class File(LyX_base):
" This class reads existing LyX files."
- def __init__(self, end_format = 0, input = "", output = "", error = "",
- debug = default_debug__, try_hard = 0, cjk_encoding = '',
- final_version = ''):
+ def __init__(self, end_format = 0, input = u'', output = u'', error = u'',
+ debug = default_debug__, try_hard = 0, cjk_encoding = u'',
+ final_version = u'', systemlyxdir = u''):
LyX_base.__init__(self, end_format, input, output, error,
- debug, try_hard, cjk_encoding, final_version)
+ debug, try_hard, cjk_encoding, final_version,
+ systemlyxdir)
self.read()
+# FIXME: header settings are completely outdated, don't use like this
#class NewFile(LyX_base):
# " This class is to create new LyX files."
# def set_header(self, **params):
# "\\use_amsmath 1",
# "\\cite_engine basic",
# "\\use_bibtopic false",
+# "\\use_indices false",
# "\\paperorientation portrait",
# "\\secnumdepth 3",
# "\\tocdepth 3",