find_tokens, find_end_of
import os.path
import gzip
+import locale
import sys
import re
import time
return line[:-1]
-def get_encoding(language, inputencoding, format):
+def get_encoding(language, inputencoding, format, cjk_encoding):
if format > 248:
return "utf8"
+ # CJK-LyX encodes files using the current locale encoding.
+ # This means that files created by CJK-LyX can only be converted using
+ # the correct locale settings unless the encoding is given as commandline
+ # argument.
+ if cjk_encoding == 'auto':
+ return locale.getpreferredencoding()
+ elif cjk_encoding != '':
+ return cjk_encoding
from lyx2lyx_lang import lang
if inputencoding == "auto" or inputencoding == "default":
return lang[language][3]
"""This class carries all the information of the LyX file."""
def __init__(self, end_format = 0, input = "", output = "", error
- = "", debug = default_debug_level, try_hard = 0, language = "english",
- encoding = "auto"):
-
+ = "", debug = default_debug_level, try_hard = 0, cjk_encoding = '',
+ language = "english", encoding = "auto"):
+
"""Arguments:
end_format: final format that the file should be converted. (integer)
input: the name of the input source, if empty resort to standard input.
self.debug = debug
self.try_hard = try_hard
+ self.cjk_encoding = cjk_encoding
if end_format:
self.end_format = self.lyxformat(end_format)
self.format = self.read_format()
self.language = get_value(self.header, "\\language", 0, default = "english")
self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto")
- self.encoding = get_encoding(self.language, self.inputencoding, self.format)
+ self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding)
self.initial_version = self.read_version()
# Second pass over header and preamble, now we know the file encoding
self.set_version()
self.set_format()
if self.encoding == "auto":
- self.encoding = get_encoding(self.language, self.encoding, self.format)
+ self.encoding = get_encoding(self.language, self.encoding, self.format, self.cjk_encoding)
if self.preamble:
i = find_token(self.header, '\\textclass', 0) + 1
class File(LyX_Base):
" This class reads existing LyX files."
- def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0):
- LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard)
+ def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0, cjk_encoding = ''):
+ LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard, cjk_encoding)
self.read()
-t, --to version final version (optional)
-o, --output name name of the output file or else goes to stdout
-n, --try-hard try hard (ignore any convertion errors)
+ -c, --cjk [encoding] files in format 248 and lower are read and
+ written in the format of CJK-LyX.
+ If encoding is not given or 'auto' the encoding
+ is determined from the locale.
-q, --quiet same as --debug=0"""
def parse_options(argv):
- _options = ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "quiet"]
+ _options = ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "cjk", "quiet"]
try:
- opts, args = getopt.getopt(argv[1:], "d:e:f:hlno:qt:v", _options)
+ opts, args = getopt.getopt(argv[1:], "c:d:e:f:hlno:qt:v", _options)
except getopt.error:
usage()
sys.exit(2)
end_format, input, output, error, debug, try_hard = 0, "", "", "", LyX.default_debug_level, 0
+ cjk_encoding = ''
for o, a in opts:
if o in ("-h", "--help"):
usage()
error = a
if o in ("-n", "--try-hard"):
try_hard = 1
+ if o in ("-c", "--cjk"):
+ if a == '':
+ cjk_encoding = 'auto'
+ else:
+ cjk_encoding = a
if args:
input = args[0]
- return end_format, input, output, error, debug, try_hard
+ return end_format, input, output, error, debug, try_hard, cjk_encoding
def main(argv):
- end_format, input, output, error, debug, try_hard = parse_options(argv)
- file = LyX.File(end_format, input, output, error, debug, try_hard)
+ end_format, input, output, error, debug, try_hard, cjk_encoding = parse_options(argv)
+ file = LyX.File(end_format, input, output, error, debug, try_hard, cjk_encoding)
file.convert()
file.write()
in multiple encodings for file formats < 249. These files are incorrectly
read and written (as if the whole file was in the encoding of the main
language).
+This is not true for files written by CJK-LyX, they are always in the locale
+encoding.
This function
- converts from fake unicode values to true unicode if forward is true, and
We do this here and not in LyX.py because it is far easier to do the
necessary parsing in modern formats than in ancient ones.
"""
+ if document.cjk_encoding != '':
+ return
encoding_stack = [document.encoding]
lang_re = re.compile(r"^\\lang\s(\S+)")
if document.inputencoding == "auto" or document.inputencoding == "default":
elif get_value(document.header, "\\inputencoding", i) == "utf8":
document.header[i] = "\\inputencoding auto"
document.inputencoding = get_value(document.header, "\\inputencoding", 0)
- document.encoding = get_encoding(document.language, document.inputencoding, 248)
+ document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
convert_multiencoding(document, False)
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
# encoding.
- encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
+ encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
lang_re = re.compile(r"^\\lang\s(\S+)")
for i in range(len(document.body)):
- if document.inputencoding == "auto" or document.inputencoding == "default":
+ if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
# Track the encoding of the current line
result = lang_re.match(document.body[i])
if result: