\textcyr -> \textcyrillic

[lyx.git] / lib / lyx2lyx / LyX.py
diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py

index 6cbc6ffffad514898f8f4bba55ceb763735a5d3e..a410dfb77b55e485577d4fdae7d169a89b9063be 100644 (file)
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@@ -1,6 +1,6 @@
  # This file is part of lyx2lyx
  # -*- coding: utf-8 -*-
-# Copyright (C) 2002-2015 The LyX Team
+# Copyright (C) 2002-2018 The LyX Team
  # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>
  # Copyright (C) 2002-2006 José Matos <jamatos@lyx.org>
  #
@@ -29,12 +29,13 @@ import sys
  import re
  import time
  import io
+import codecs
  
  try:
      import lyx2lyx_version
      version__ = lyx2lyx_version.version
  except: # we are running from build directory so assume the last version
-    version__ = '2.3'
+    version__ = '2.4'
  
  default_debug__ = 2
  
@@ -91,8 +92,9 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                     ("1_6", list(range(277,346)), minor_versions("1.6" , 10)),
                     ("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
                     ("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
-                   ("2_2", list(range(475,509)), minor_versions("2.2" , 0)),
-                   ("2_3", list(range(509,510)), minor_versions("2.3" , 0))
+                   ("2_2", list(range(475,509)), minor_versions("2.2" , 4)),
+                   ("2_3", list(range(509,545)), minor_versions("2.3" , 0)),
+                   ("2_4", (), minor_versions("2.4" , 0))
                    ]
  
  ####################################################################
@@ -204,10 +206,10 @@ def get_encoding(language, inputencoding, format, cjk_encoding):
  class LyX_base:
      """This class carries all the information of the LyX file."""
  
-    def __init__(self, end_format = 0, input = "", output = "", error = "",
-                 debug = default_debug__, try_hard = 0, cjk_encoding = '',
-                 final_version = "", systemlyxdir = '', language = "english",
-                 encoding = "auto"):
+    def __init__(self, end_format = 0, input = u'', output = u'', error = u'',
+                 debug = default_debug__, try_hard = 0, cjk_encoding = u'',
+                 final_version = u'', systemlyxdir = u'', language = u'english',
+                 encoding = u'auto'):
  
          """Arguments:
          end_format: final format that the file should be converted. (integer)
@@ -304,12 +306,20 @@ class LyX_base:
          # use latin1. This works since a) the parts we are interested in are
          # pure ASCII (subset of latin1) and b) in contrast to pure ascii or
          # utf8, one can decode any 8byte string using latin1.
+        first_line = True
          while True:
              line = self.input.readline()
              if not line:
                  # eof found before end of header
                  self.error("Invalid LyX file: Missing body.")
  
+            if first_line:
+                # Remove UTF8 BOM marker if present
+                if line.startswith(codecs.BOM_UTF8):
+                    line = line[len(codecs.BOM_UTF8):]
+
+                first_line = False
+
              if PY2:
                  line = trim_eol(line)
                  decoded = line
@@ -317,7 +327,7 @@ class LyX_base:
                  line = trim_eol_binary(line)
                  decoded = line.decode('latin1')
              if check_token(decoded, '\\begin_preamble'):
-                while 1:
+                while True:
                      line = self.input.readline()
                      if not line:
                          # eof found before end of header
@@ -345,7 +355,7 @@ class LyX_base:
              if check_token(decoded, '\\end_preamble'):
                  continue
  
-            line = line.strip()
+            line = line.rstrip()
              if not line:
                  continue
  
@@ -384,6 +394,7 @@ class LyX_base:
              self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
                                             default = b"auto").decode('ascii')
          self.format = self.read_format()
+        self.initial_format = self.format
          self.encoding = get_encoding(self.language,
                                       self.inputencoding, self.format,
                                       self.cjk_encoding)
@@ -401,7 +412,7 @@ class LyX_base:
              self.body[i] = self.body[i].decode(self.encoding)
  
          # Read document body
-        while 1:
+        while True:
              line = self.input.readline().decode(self.encoding)
              if not line:
                  break
@@ -459,7 +470,7 @@ class LyX_base:
  
          # Since we do not know the encoding yet we need to read the input as
          # bytes in binary mode, and convert later to unicode.
-        if input and input != '-':
+        if input and input != u'-':
              self.dir = os.path.dirname(os.path.abspath(input))
              try:
                  gzip.open(input).readline()
@@ -469,7 +480,7 @@ class LyX_base:
                  self.input = open(input, 'rb')
                  self.compressed = False
          else:
-            self.dir = ''
+            self.dir = u''
              self.input = os.fdopen(sys.stdin.fileno(), 'rb')
              self.compressed = False
  
@@ -523,7 +534,7 @@ class LyX_base:
          " Set the header with the version used."
  
          initial_comment = " ".join(["#LyX %s created this file." % version__,
-                                    "For more info see http://www.lyx.org/"])
+                                    "For more info see https://www.lyx.org/"])
  
          # Simple heuristic to determine the comment that always starts
          # a lyx file
@@ -689,7 +700,6 @@ class LyX_base:
          conversion are taken.  It returns a list of modules needed to
          convert the LyX file from self.format to self.end_format"""
  
-        self.start =  self.format
          format = self.format
          correct_version = 0
  
@@ -723,7 +733,7 @@ class LyX_base:
  
          # Convertion mode, back or forth
          steps = []
-        if (initial_step, self.start) < (final_step, self.end_format):
+        if (initial_step, self.initial_format) < (final_step, self.end_format):
              mode = "convert"
              full_steps = []
              for step in format_relation:
@@ -767,7 +777,7 @@ class LyX_base:
  
  #        toc_par = []
  #        i = 0
-#        while 1:
+#        while True:
  #            i = find_tokens(self.body, sections, i)
  #            if i == -1:
  #                break
@@ -817,9 +827,9 @@ class LyX_base:
  class File(LyX_base):
      " This class reads existing LyX files."
  
-    def __init__(self, end_format = 0, input = "", output = "", error = "",
-                 debug = default_debug__, try_hard = 0, cjk_encoding = '',
-                 final_version = '', systemlyxdir = ''):
+    def __init__(self, end_format = 0, input = u'', output = u'', error = u'',
+                 debug = default_debug__, try_hard = 0, cjk_encoding = u'',
+                 final_version = u'', systemlyxdir = u''):
          LyX_base.__init__(self, end_format, input, output, error,
                            debug, try_hard, cjk_encoding, final_version,
                            systemlyxdir)