Consistent output of breakable/non-breakable dashes on all TeX engines.

[lyx.git] / lib / lyx2lyx / LyX.py
diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py

index 18088a4dea145e7296a9979f36ba5dcb4aa68324..c76fc589b38b16d68e95929934fd11dd6b8e6c4b 100644 (file)
--- a/lib/lyx2lyx/LyX.py
+++ b/lib/lyx2lyx/LyX.py
@@ -29,6 +29,7 @@ import sys
  import re
  import time
  import io
+import codecs
  
  try:
      import lyx2lyx_version
@@ -92,7 +93,7 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                     ("2_0", list(range(346,414)), minor_versions("2.0" , 8)),
                     ("2_1", list(range(414,475)), minor_versions("2.1" , 5)),
                     ("2_2", list(range(475,509)), minor_versions("2.2" , 0)),
-                   ("2_3", list(range(509,511)), minor_versions("2.3" , 0))
+                   ("2_3", (), minor_versions("2.3" , 0))
                    ]
  
  ####################################################################
@@ -304,12 +305,20 @@ class LyX_base:
          # use latin1. This works since a) the parts we are interested in are
          # pure ASCII (subset of latin1) and b) in contrast to pure ascii or
          # utf8, one can decode any 8byte string using latin1.
+        first_line = True
          while True:
              line = self.input.readline()
              if not line:
                  # eof found before end of header
                  self.error("Invalid LyX file: Missing body.")
  
+            if first_line:
+                # Remove UTF8 BOM marker if present
+                if line.startswith(codecs.BOM_UTF8):
+                    line = line[len(codecs.BOM_UTF8):]
+
+                first_line = False
+
              if PY2:
                  line = trim_eol(line)
                  decoded = line
@@ -345,7 +354,7 @@ class LyX_base:
              if check_token(decoded, '\\end_preamble'):
                  continue
  
-            line = line.strip()
+            line = line.rstrip()
              if not line:
                  continue
  
@@ -384,6 +393,7 @@ class LyX_base:
              self.inputencoding = get_value(self.header, b"\\inputencoding", 0,
                                             default = b"auto").decode('ascii')
          self.format = self.read_format()
+        self.initial_format = self.format
          self.encoding = get_encoding(self.language,
                                       self.inputencoding, self.format,
                                       self.cjk_encoding)
@@ -689,7 +699,6 @@ class LyX_base:
          conversion are taken.  It returns a list of modules needed to
          convert the LyX file from self.format to self.end_format"""
  
-        self.start =  self.format
          format = self.format
          correct_version = 0
  
@@ -723,7 +732,7 @@ class LyX_base:
  
          # Convertion mode, back or forth
          steps = []
-        if (initial_step, self.start) < (final_step, self.end_format):
+        if (initial_step, self.initial_format) < (final_step, self.end_format):
              mode = "convert"
              full_steps = []
              for step in format_relation: