#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
""" Convert files to the file format generated by lyx 1.5"""
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
from LyX import get_encoding
+# Provide support for both python 2 and 3
+PY2 = sys.version_info[0] == 2
+if not PY2:
+ text_type = str
+ unichr = chr
+else:
+ text_type = unicode
+ unichr = unichr
+# End of code to support for both python 2 and 3
####################################################################
# Private helper functions
if font_scheme == '':
document.warning("Malformed LyX document: Empty `\\fontscheme'.")
font_scheme = 'default'
- if not font_scheme in roman_fonts.keys():
+ if not font_scheme in list(roman_fonts.keys()):
document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
font_scheme = 'default'
document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
del document.header[i]
if font_tt_scale != '100':
document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
- for font_scheme in roman_fonts.keys():
+ for font_scheme in list(roman_fonts.keys()):
if (roman_fonts[font_scheme] == fonts['roman'] and
sans_fonts[font_scheme] == fonts['sans'] and
typewriter_fonts[font_scheme] == fonts['typewriter']):
inset_result = inset_re.match(document.body[i])
if inset_result:
insets.append(inset_result.group(1))
- else:
+ else:
insets.append("")
elif find_token(document.body, "\\end_inset", i, i + 1) == i:
del insets[-1]
convert_multiencoding(document, False)
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
def read_unicodesymbols():
" Read the unicodesymbols list of unicode characters and corresponding commands."
pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
# Define strings to start and end ERT and math insets
- ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
+ ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
ert_outro='\n\\end_layout\n\n\\end_inset\n'
math_intro='\n\\begin_inset Formula $'
math_outro='$\n\\end_inset'
last_char = character
except:
# Try to replace with ERT/math inset
- if spec_chars.has_key(character):
+ if character in spec_chars:
command = spec_chars[character][0] # the command to replace unicode
flag1 = spec_chars[character][1]
flag2 = spec_chars[character][2]
command = command + '}'
elif not insets or insets[-1] != "ERT":
# add an ERT inset with the replacement character
- command = command.replace('\\\\', ert_intro)
- command = command + ert_outro
+ command = command.replace('\\\\', '\n\\backslash\n')
+ command = ert_intro + command + ert_outro
else:
command = command.replace('\\\\', '\n\\backslash\n')
last_char = '' # indicate that the character should not be removed
insets.append(line[13:].split()[0])
if line.find('\\end_inset') > -1:
del insets[-1]
-
+
# Try to write the line
try:
# If all goes well the line is written here
if commandparams_info[name][0] == "":
document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
if option2 != "":
if commandparams_info[name][1] == "":
document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
if argument != "":
if commandparams_info[name][2] == "":
document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
document.body[i:i+1] = lines
i = i + 1
if i == -1:
break
name = document.body[i].split()[2]
- j = find_end_of_inset(document.body, i + 1)
+ j = find_end_of_inset(document.body, i)
preview_line = ""
option1 = ""
option2 = ""
preview_line = document.body[k]
elif (commandparams_info[name][0] != "" and
pname == commandparams_info[name][0]):
- option1 = pvalue.strip('"').replace('\\"', '"')
+ option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][1] != "" and
pname == commandparams_info[name][1]):
- option2 = pvalue.strip('"').replace('\\"', '"')
+ option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][2] != "" and
pname == commandparams_info[name][2]):
- argument = pvalue.strip('"').replace('\\"', '"')
+ argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif document.body[k].strip() != "":
document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
if name == "bibitem":
lines.append('')
lines.append('\\end_inset')
document.body[i:j+1] = lines
- i = j + 1
+ i += len(lines) + 1
def revert_nomenclature(document):
return last_tokens.find('\\') != -1
+# A wrapper around normalize that handles special cases (cf. bug 3313)
+def normalize(form, text):
+ # do not normalize OHM, ANGSTROM
+ keep_characters = [0x2126,0x212b]
+ result = ''
+ convert = ''
+ for i in text:
+ if ord(i) in keep_characters:
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ convert = ''
+ result = result + i
+ else:
+ convert = convert + i
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ return result
+
+
def revert_accent(document):
inverse_accent_map = {}
for k in accent_map:
# because we never use u'xxx' for string literals, but 'xxx'.
# Therefore we may have to try two times to normalize the data.
try:
- document.body[i] = unicodedata.normalize("NFD", document.body[i])
+ document.body[i] = normalize("NFD", document.body[i])
except TypeError:
- document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
+ document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
accented_char = inverse_accented_map[accented_char]
accent = document.body[i][j]
try:
- dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
+ dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
except UnicodeEncodeError:
# Insert the rest of the line as new line
if j < len(document.body[i]) - 1:
# Normalize to "Normal form C" (NFC, pre-composed characters) again
for i in range(len(document.body)):
- document.body[i] = unicodedata.normalize("NFC", document.body[i])
+ document.body[i] = normalize("NFC", document.body[i])
def normalize_font_whitespace_259(document):
""" Before format 259 the font changes were ignored if a
whitespace was the first or last character in the sequence, this function
transfers the whitespace outside."""
-
+
char_properties = {"\\series": "default",
"\\emph": "default",
"\\color": "none",
def normalize_font_whitespace_274(document):
""" Before format 259 (sic) the font changes were ignored if a
- whitespace was the first or last character in the sequence. This was
- corrected for most font properties in format 259, but the language
+ whitespace was the first or last character in the sequence. This was
+ corrected for most font properties in format 259, but the language
was forgotten then. This function applies the same conversion done
there (namely, transfers the whitespace outside) for font language
changes, as well."""
def get_paragraph_language(document, i):
""" Return the language of the paragraph in which line i of the document
body is. If the first thing in the paragraph is a \\lang command, that
- is the paragraph's langauge; otherwise, the paragraph's language is the
+ is the paragraph's langauge; otherwise, the paragraph's language is the
document's language."""
lines = document.body
-
+
first_nonempty_line = \
find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
return words[1]
else:
return document.language
-
+
def normalize_font_whitespace(document, char_properties):
""" Before format 259 the font changes were ignored if a
whitespace was the first or last character in the sequence, this function
# a new paragraph resets all font changes
changes.clear()
# also reset the default language to be the paragraph's language
- if "\\lang" in char_properties.keys():
+ if "\\lang" in list(char_properties.keys()):
char_properties["\\lang"] = \
get_paragraph_language(document, i + 1)
- elif len(words) > 1 and words[0] in char_properties.keys():
+ elif len(words) > 1 and words[0] in list(char_properties.keys()):
# we have a font change
if char_properties[words[0]] == words[1]:
# property gets reset
- if words[0] in changes.keys():
+ if words[0] in list(changes.keys()):
del changes[words[0]]
defaultproperty = True
else:
lines[i-1] = lines[i-1][:-1]
# a space before the font change
added_lines = [" "]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that is already in lines[i]
if k != words[0]:
added_lines[1:1] = ["%s %s" % (k, changes[k])]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that must be added below anyway
if k != words[0]:
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
continue
lines[i+1] = lines[i+1][1:]
added_lines = [" "]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that is already in lines[i]
if k != words[0]:
added_lines[1:1] = ["%s %s" % (k, changes[k])]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that must be added below anyway
if k != words[0]:
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
def convert_tableborder(document):
- # The problematic is: LyX double the table cell border as it ignores the "|" character in
+ # The problem is: LyX doubles the table cell border as it ignores the "|" character in
# the cell arguments. A fix takes care of this and therefore the "|" has to be removed
i = 0
while i < len(document.body):
# the two tokens have to be in one line
if (h != -1 and k != -1):
# delete the "|"
- document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
+ document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
i = i + 1
def revert_armenian(document):
-
- # set inputencoding from armscii8 to auto
+
+ # set inputencoding from armscii8 to auto
if document.inputencoding == "armscii8":
i = find_token(document.header, "\\inputencoding", 0)
if i != -1:
document.header[i] = "\\inputencoding auto"
- # check if preamble exists, if not k is set to -1
+ # check if preamble exists, if not k is set to -1
i = 0
k = -1
while i < len(document.preamble):
# create the preamble when it doesn't exist
else:
document.preamble.append('\\usepackage{armtex}')
- # Set document language from armenian to english
+ # Set document language from armenian to english
if document.language == "armenian":
document.language = "english"
i = find_token(document.header, "\\language", 0)
def revert_listings_inset(document):
- r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
+ r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
FROM
-\begin_inset
+\begin_inset
lstparams "language=Delphi"
inline true
status open
k = cap_end + 1
inlinecode = ''
# looking for the oneline code for lstinline
- inlinecode = document.body[find_end_of_layout(document.body,
+ inlinecode = document.body[find_end_of_layout(document.body,
find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
if len(caption) > 0:
if len(params) == 0:
document.body[i:(j+1)] = [r'\begin_inset ERT',
'status %s' % status,
r'\begin_layout %s' % document.default_layout,
- '',
+ '',
'',
r'\backslash',
'lstinline%s{%s}' % (params, inlinecode),
r'\end_layout',
'',
r'\end_inset']
-
+
def revert_include_listings(document):
r''' Revert lstinputlisting Include option , translate
# find command line lstinputlisting{file}[options]
cmd, file, option = '', '', ''
if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
- cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
+ cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
option = option.replace('\\', '\\backslash\n')
document.body[i : j + 1] = [r'\begin_inset ERT',
'status open',