Add commandline switch to read files from CJK-LyX

author Georg Baum <Georg.Baum@post.rwth-aachen.de>

Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)

committer Georg Baum <Georg.Baum@post.rwth-aachen.de>

Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)
author Georg Baum <Georg.Baum@post.rwth-aachen.de>
Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)
committer Georg Baum <Georg.Baum@post.rwth-aachen.de>
Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)
diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py

index f71fdc200a18d973a22f2eb2ad1ab0fa8e33bcf8..4bd8aae94d6ec1dad88cc5874ab7fda766a69514 100644 (file)
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@@ -21,6 +21,7 @@ from parser_tools import get_value, check_token, find_token,\
       find_tokens, find_end_of
  import os.path
  import gzip
+import locale
  import sys
  import re
  import time
@@ -108,9 +109,17 @@ def trim_eol(line):
          return line[:-1]
  
  
-def get_encoding(language, inputencoding, format):
+def get_encoding(language, inputencoding, format, cjk_encoding):
      if format > 248:
          return "utf8"
+    # CJK-LyX encodes files using the current locale encoding.
+    # This means that files created by CJK-LyX can only be converted using
+    # the correct locale settings unless the encoding is given as commandline
+    # argument.
+    if cjk_encoding == 'auto':
+        return locale.getpreferredencoding()
+    elif cjk_encoding != '':
+        return cjk_encoding
      from lyx2lyx_lang import lang
      if inputencoding == "auto" or inputencoding == "default":
          return lang[language][3]
@@ -128,9 +137,9 @@ class LyX_Base:
      """This class carries all the information of the LyX file."""
      
      def __init__(self, end_format = 0, input = "", output = "", error
-                 = "", debug = default_debug_level, try_hard = 0, language = "english",
-                 encoding = "auto"):
-        
+                 = "", debug = default_debug_level, try_hard = 0, cjk_encoding = '',
+                 language = "english", encoding = "auto"):
+
          """Arguments:
          end_format: final format that the file should be converted. (integer)
          input: the name of the input source, if empty resort to standard input.
@@ -147,6 +156,7 @@ class LyX_Base:
  
          self.debug = debug
          self.try_hard = try_hard
+        self.cjk_encoding = cjk_encoding
  
          if end_format:
              self.end_format = self.lyxformat(end_format)
@@ -226,7 +236,7 @@ class LyX_Base:
          self.format  = self.read_format()
          self.language = get_value(self.header, "\\language", 0, default = "english")
          self.inputencoding = get_value(self.header, "\\inputencoding", 0, default = "auto")
-        self.encoding = get_encoding(self.language, self.inputencoding, self.format)
+        self.encoding = get_encoding(self.language, self.inputencoding, self.format, self.cjk_encoding)
          self.initial_version = self.read_version()
  
          # Second pass over header and preamble, now we know the file encoding
@@ -248,7 +258,7 @@ class LyX_Base:
          self.set_version()
          self.set_format()
          if self.encoding == "auto":
-            self.encoding = get_encoding(self.language, self.encoding, self.format)
+            self.encoding = get_encoding(self.language, self.encoding, self.format, self.cjk_encoding)
  
          if self.preamble:
              i = find_token(self.header, '\\textclass', 0) + 1
@@ -532,8 +542,8 @@ class LyX_Base:
  
  class File(LyX_Base):
      " This class reads existing LyX files."
-    def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0):
-        LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard)
+    def __init__(self, end_format = 0, input = "", output = "", error = "", debug = default_debug_level, try_hard = 0, cjk_encoding = ''):
+        LyX_Base.__init__(self, end_format, input, output, error, debug, try_hard, cjk_encoding)
          self.read()
  
  
diff --git a/lib/lyx2lyx/lyx2lyx b/lib/lyx2lyx/lyx2lyx

index 82e9d8f522a60273f56f40871432549a4d43fadb..8c7b98b26320a229f31ff6822f49fd4538b8e1ee 100755 (executable)
--- a/lib/lyx2lyx/lyx2lyx
+++ b/lib/lyx2lyx/lyx2lyx
@@ -36,18 +36,23 @@ Options:
      -t, --to version           final version (optional)
      -o, --output name          name of the output file or else goes to stdout
      -n, --try-hard             try hard (ignore any convertion errors)
+    -c, --cjk [encoding]       files in format 248 and lower are read and
+                               written in the format of CJK-LyX.
+                               If encoding is not given or 'auto' the encoding
+                               is determined from the locale.
      -q, --quiet                        same as --debug=0"""
  
  
  def parse_options(argv):
-    _options =  ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "quiet"]
+    _options =  ["help", "version", "list", "debug=", "err=", "from=", "to=", "output=", "try-hard", "cjk", "quiet"]
      try:
-       opts, args = getopt.getopt(argv[1:], "d:e:f:hlno:qt:v", _options)
+       opts, args = getopt.getopt(argv[1:], "c:d:e:f:hlno:qt:v", _options)
      except getopt.error:
          usage()
          sys.exit(2)
  
      end_format, input, output, error, debug, try_hard = 0, "", "", "", LyX.default_debug_level, 0
+    cjk_encoding = ''
      for o, a in opts:
          if o in ("-h", "--help"):
              usage()
@@ -71,15 +76,20 @@ def parse_options(argv):
              error = a
          if o in ("-n", "--try-hard"):
              try_hard = 1
+        if o in ("-c", "--cjk"):
+            if a == '':
+                cjk_encoding = 'auto'
+            else:
+                cjk_encoding = a
      if args:
          input = args[0]
  
-    return end_format, input, output, error, debug, try_hard
+    return end_format, input, output, error, debug, try_hard, cjk_encoding
  
  
  def main(argv):
-    end_format, input, output, error, debug, try_hard = parse_options(argv)
-    file = LyX.File(end_format, input, output, error, debug, try_hard)
+    end_format, input, output, error, debug, try_hard, cjk_encoding = parse_options(argv)
+    file = LyX.File(end_format, input, output, error, debug, try_hard, cjk_encoding)
  
      file.convert()
      file.write()
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index f647603335629f5b3aaeaa8d54dff311731ada70..deae7fc3b5b3ad3e9cf3799e02ba4e327162d07e 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -230,6 +230,8 @@ where at least two languages have different default encodings are encoded
  in multiple encodings for file formats < 249. These files are incorrectly
  read and written (as if the whole file was in the encoding of the main
  language).
+This is not true for files written by CJK-LyX, they are always in the locale
+encoding.
  
  This function
  - converts from fake unicode values to true unicode if forward is true, and
@@ -239,6 +241,8 @@ document.encoding must be set to the old value (format 248) in both cases.
  We do this here and not in LyX.py because it is far easier to do the
  necessary parsing in modern formats than in ancient ones.
  """
+    if document.cjk_encoding != '':
+        return
      encoding_stack = [document.encoding]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      if document.inputencoding == "auto" or document.inputencoding == "default":
@@ -292,7 +296,7 @@ def revert_utf8(document):
      elif get_value(document.header, "\\inputencoding", i) == "utf8":
          document.header[i] = "\\inputencoding auto"
      document.inputencoding = get_value(document.header, "\\inputencoding", 0)
-    document.encoding = get_encoding(document.language, document.inputencoding, 248)
+    document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
      convert_multiencoding(document, False)
  
  
@@ -1016,11 +1020,11 @@ def revert_accent(document):
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
      # encoding.
-    encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
+    encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      for i in range(len(document.body)):
  
-        if document.inputencoding == "auto" or document.inputencoding == "default":
+        if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
              # Track the encoding of the current line
              result = lang_re.match(document.body[i])
              if result:
author	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)
committer	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Tue, 13 Feb 2007 16:57:48 +0000 (16:57 +0000)
lib/lyx2lyx/LyX.py		patch \| blob \| history
lib/lyx2lyx/lyx2lyx		patch \| blob \| history
lib/lyx2lyx/lyx_1_5.py		patch \| blob \| history