#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
""" Convert files to the file format generated by lyx 1.5"""
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
from LyX import get_encoding
+# Provide support for both python 2 and 3
+PY2 = sys.version_info[0] == 2
+if not PY2:
+ text_type = str
+ unichr = chr
+else:
+ text_type = unicode
+# End of code to support for both python 2 and 3
####################################################################
# Private helper functions
def revert_framed(document):
"Revert framed notes. "
i = 0
- while 1:
+ while True:
i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
if i == -1:
if font_scheme == '':
document.warning("Malformed LyX document: Empty `\\fontscheme'.")
font_scheme = 'default'
- if not font_scheme in roman_fonts.keys():
+ if not font_scheme in list(roman_fonts.keys()):
document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
font_scheme = 'default'
document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
del document.header[i]
if font_tt_scale != '100':
document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
- for font_scheme in roman_fonts.keys():
+ for font_scheme in list(roman_fonts.keys()):
if (roman_fonts[font_scheme] == fonts['roman'] and
sans_fonts[font_scheme] == fonts['sans'] and
typewriter_fonts[font_scheme] == fonts['typewriter']):
re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset Tabular", i)
if i == -1:
return
We do this here and not in LyX.py because it is far easier to do the
necessary parsing in modern formats than in ancient ones.
"""
+ inset_types = ["Foot", "Note"]
if document.cjk_encoding != '':
return
encoding_stack = [document.encoding]
+ insets = []
lang_re = re.compile(r"^\\lang\s(\S+)")
+ inset_re = re.compile(r"^\\begin_inset\s(\S+)")
+ if not forward: # no need to read file unless we are reverting
+ spec_chars = read_unicodesymbols()
+
if document.inputencoding == "auto" or document.inputencoding == "default":
- for i in range(len(document.body)):
+ i = 0
+ while i < len(document.body):
result = lang_re.match(document.body[i])
if result:
language = result.group(1)
encoding_stack[-1] = lang[language][3]
elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
- encoding_stack.append(encoding_stack[-1])
+ if len(insets) > 0 and insets[-1] in inset_types:
+ from lyx2lyx_lang import lang
+ encoding_stack.append(lang[document.language][3])
+ else:
+ encoding_stack.append(encoding_stack[-1])
elif find_token(document.body, "\\end_layout", i, i + 1) == i:
document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
if len(encoding_stack) == 1:
document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
else:
del encoding_stack[-1]
+ elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
+ inset_result = inset_re.match(document.body[i])
+ if inset_result:
+ insets.append(inset_result.group(1))
+ else:
+ insets.append("")
+ elif find_token(document.body, "\\end_inset", i, i + 1) == i:
+ del insets[-1]
if encoding_stack[-1] != document.encoding:
if forward:
# This line has been incorrectly interpreted as if it was
# with the correct encoding.
document.body[i] = orig.decode(encoding_stack[-1])
else:
- # Convert unicode to the 8bit string that will be written
- # to the file with the correct encoding.
- orig = document.body[i].encode(encoding_stack[-1])
- # Convert the 8bit string that will be written to the
- # file to fake unicode with the encoding that will later
- # be used when writing to the file.
- document.body[i] = orig.decode(document.encoding)
+ try:
+ # Convert unicode to the 8bit string that will be written
+ # to the file with the correct encoding.
+ orig = document.body[i].encode(encoding_stack[-1])
+ # Convert the 8bit string that will be written to the
+ # file to fake unicode with the encoding that will later
+ # be used when writing to the file.
+ document.body[i] = orig.decode(document.encoding)
+ except:
+ mod_line = revert_unicode_line(document, i, insets, spec_chars)
+ document.body[i:i+1] = mod_line.split('\n')
+ i += len(mod_line.split('\n')) - 1
+ i += 1
def convert_utf8(document):
convert_multiencoding(document, False)
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
+def read_unicodesymbols():
+ " Read the unicodesymbols list of unicode characters and corresponding commands."
+ pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+ fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+ spec_chars = {}
+ for line in fp.readlines():
+ if line[0] != '#':
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+ try:
+ # flag1 and flag2 are preamble and other flags
+ [ucs4,command,flag1,flag2] =line.split(None,3)
+ spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+ except:
+ pass
+ fp.close()
+ return spec_chars
+
+
+def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
+ # Define strings to start and end ERT and math insets
+ ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
+ ert_outro='\n\\end_layout\n\n\\end_inset\n'
+ math_intro='\n\\begin_inset Formula $'
+ math_outro='$\n\\end_inset'
+
+ mod_line = u''
+ if i and not is_inset_line(document, i-1):
+ last_char = document.body[i - 1][-1:]
+ else:
+ last_char = ''
+
+ line = document.body[i]
+ for character in line:
+ try:
+ # Try to write the character
+ dummy = character.encode(document.encoding)
+ mod_line += character
+ last_char = character
+ except:
+ # Try to replace with ERT/math inset
+ if character in spec_chars:
+ command = spec_chars[character][0] # the command to replace unicode
+ flag1 = spec_chars[character][1]
+ flag2 = spec_chars[character][2]
+ if flag1.find('combining') > -1 or flag2.find('combining') > -1:
+ # We have a character that should be combined with the previous
+ command += '{' + last_char + '}'
+ # Remove the last character. Ignore if it is whitespace
+ if len(last_char.rstrip()):
+ # last_char was found and is not whitespace
+ if mod_line:
+ mod_line = mod_line[:-1]
+ else: # last_char belongs to the last line
+ document.body[i-1] = document.body[i-1][:-1]
+ else:
+ # The last character was replaced by a command. For now it is
+ # ignored. This could be handled better.
+ pass
+ if command[0:2] == '\\\\':
+ if command[2:12]=='ensuremath':
+ if insets and insets[-1] == "ERT":
+ # math in ERT
+ command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+ command = command.replace('}', '$\n')
+ elif not insets or insets[-1] != "Formula":
+ # add a math inset with the replacement character
+ command = command.replace('\\\\ensuremath{\\', math_intro)
+ command = command.replace('}', math_outro)
+ else:
+ # we are already in a math inset
+ command = command.replace('\\\\ensuremath{\\', '')
+ command = command.replace('}', '')
+ else:
+ if insets and insets[-1] == "Formula":
+ # avoid putting an ERT in a math; instead put command as text
+ command = command.replace('\\\\', '\mathrm{')
+ command = command + '}'
+ elif not insets or insets[-1] != "ERT":
+ # add an ERT inset with the replacement character
+ command = command.replace('\\\\', '\n\\backslash\n')
+ command = ert_intro + command + ert_outro
+ else:
+ command = command.replace('\\\\', '\n\\backslash\n')
+ last_char = '' # indicate that the character should not be removed
+ mod_line += command
+ else:
+ # Replace with replacement string
+ mod_line += replacement_character
+ return mod_line
+
+
+def revert_unicode(document):
+ '''Transform unicode characters that can not be written using the
+document encoding to commands according to the unicodesymbols
+file. Characters that can not be replaced by commands are replaced by
+an replacement string. Flags other than 'combined' are currently not
+implemented.'''
+ spec_chars = read_unicodesymbols()
+ insets = [] # list of active insets
+
+ # Go through the document to capture all combining characters
+ i = 0
+ while i < len(document.body):
+ line = document.body[i]
+ # Check for insets
+ if line.find('\\begin_inset') > -1:
+ insets.append(line[13:].split()[0])
+ if line.find('\\end_inset') > -1:
+ del insets[-1]
+
+ # Try to write the line
+ try:
+ # If all goes well the line is written here
+ dummy = line.encode(document.encoding)
+ i += 1
+ except:
+ # Error, some character(s) in the line need to be replaced
+ mod_line = revert_unicode_line(document, i, insets, spec_chars)
+ document.body[i:i+1] = mod_line.split('\n')
+ i += len(mod_line.split('\n'))
+
+
def revert_cs_label(document):
" Remove status flag of charstyle label. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset CharStyle", i)
if i == -1:
return
# Seach for a line starting 'show_label'
# If it is not there, break with a warning message
i = i + 1
- while 1:
+ while True:
if (document.body[i][:10] == "show_label"):
del document.body[i]
break
This must be called after convert_commandparams.
"""
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\bibitem", i)
if i == -1:
break
# convert_bibitem()), but could be read in, so we convert it here, too.
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset LatexCommand", i)
if i == -1:
break
i = i + 1
continue
+ j = find_token(document.body, "\\end_inset", i + 1)
+ if j == -1:
+ document.warning("Malformed document")
+ else:
+ command += "".join(document.body[i+1:j])
+ document.body[i+1:j] = []
+
# The following parser is taken from the original InsetCommandParams::scanCommand
name = ""
option1 = ""
if commandparams_info[name][0] == "":
document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
if option2 != "":
if commandparams_info[name][1] == "":
document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
if argument != "":
if commandparams_info[name][2] == "":
document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
document.body[i:i+1] = lines
i = i + 1
def revert_commandparams(document):
regex = re.compile(r'(\S+)\s+(.+)')
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset LatexCommand", i)
if i == -1:
break
name = document.body[i].split()[2]
- j = find_end_of_inset(document.body, i + 1)
+ j = find_end_of_inset(document.body, i)
preview_line = ""
option1 = ""
option2 = ""
preview_line = document.body[k]
elif (commandparams_info[name][0] != "" and
pname == commandparams_info[name][0]):
- option1 = pvalue.strip('"').replace('\\"', '"')
+ option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][1] != "" and
pname == commandparams_info[name][1]):
- option2 = pvalue.strip('"').replace('\\"', '"')
+ option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][2] != "" and
pname == commandparams_info[name][2]):
- argument = pvalue.strip('"').replace('\\"', '"')
+ argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif document.body[k].strip() != "":
document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
if name == "bibitem":
lines.append('')
lines.append('\\end_inset')
document.body[i:j+1] = lines
- i = j + 1
+ i += len(lines) + 1
def revert_nomenclature(document):
regex = re.compile(r'(\S+)\s+(.+)')
i = 0
use_nomencl = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
if i == -1:
break
regex = re.compile(r'(\S+)\s+(.+)')
i = 0
use_nomencl = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
if i == -1:
break
def revert_clearpage(document):
" clearpage -> ERT "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\clearpage", i)
if i == -1:
break
def revert_cleardoublepage(document):
" cleardoublepage -> ERT "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\cleardoublepage", i)
if i == -1:
break
def convert_caption(document):
" Convert caption layouts to caption insets. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_layout Caption", i)
if i == -1:
return
" Convert caption insets to caption layouts. "
" This assumes that the text class has a caption style. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset Caption", i)
if i == -1:
return
"=" : u'\u0304', # macron
"u" : u'\u0306', # breve
"." : u'\u0307', # dot above
- "\"": u'\u0308', # diaresis
+ "\"": u'\u0308', # diaeresis
"r" : u'\u030a', # ring above
"H" : u'\u030b', # double acute
"v" : u'\u030c', # caron
re_contents = re.compile(r'^([^\s{]+)(.*)$')
re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
i = 0
- while 1:
+ while True:
i = find_re(document.body, re_wholeinset, i)
if i == -1:
return
i += 3
+def is_inset_line(document, i):
+ """ Line i of body has an inset """
+ if document.body[i][:1] == '\\':
+ return True
+ last_tokens = "".join(document.body[i].split()[-2:])
+ return last_tokens.find('\\') != -1
+
+
+# A wrapper around normalize that handles special cases (cf. bug 3313)
+def normalize(form, text):
+ # do not normalize OHM, ANGSTROM
+ keep_characters = [0x2126,0x212b]
+ result = ''
+ convert = ''
+ for i in text:
+ if ord(i) in keep_characters:
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ convert = ''
+ result = result + i
+ else:
+ convert = convert + i
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ return result
+
+
def revert_accent(document):
inverse_accent_map = {}
for k in accent_map:
# words before unicode normalization.
# We do this only if the next line starts with an accent, otherwise we
# would create things like '\begin_inset ERTstatus'.
- numberoflines = len(document.body)
- for i in range(numberoflines-1):
+ for i in range(len(document.body) - 1):
if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
continue
- if (document.body[i+1][0] in inverse_accent_map):
+ if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
# the last character of this line and the first of the next line
- # form probably a surrogate pair.
+ # form probably a surrogate pair, inline insets are excluded (second part of the test)
while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
document.body[i] += document.body[i+1][0]
document.body[i+1] = document.body[i+1][1:]
# Normalize to "Normal form D" (NFD, also known as canonical decomposition).
# This is needed to catch all accented characters.
- for i in range(numberoflines):
+ for i in range(len(document.body)):
# Unfortunately we have a mixture of unicode strings and plain strings,
# because we never use u'xxx' for string literals, but 'xxx'.
# Therefore we may have to try two times to normalize the data.
try:
- document.body[i] = unicodedata.normalize("NFD", document.body[i])
+ document.body[i] = normalize("NFD", document.body[i])
except TypeError:
- document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
+ document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
# encoding.
encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
lang_re = re.compile(r"^\\lang\s(\S+)")
- for i in range(len(document.body)):
+ i = 0
+ while i < len(document.body):
if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
# Track the encoding of the current line
result = lang_re.match(document.body[i])
except UnicodeEncodeError:
# Insert the rest of the line as new line
if j < len(document.body[i]) - 1:
- document.body[i+1:i+1] = document.body[i][j+1:]
+ document.body.insert(i+1, document.body[i][j+1:])
# Delete the accented character
- if j > 0:
- document.body[i] = document.body[i][:j-1]
- else:
- document.body[i] = u''
+ document.body[i] = document.body[i][:j]
# Finally add the InsetLaTeXAccent
document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
break
accented_char = inverse_accented_map[accented_char]
accent = document.body[i][j]
try:
- dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
+ dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
except UnicodeEncodeError:
# Insert the rest of the line as new line
if j < len(document.body[i]) - 1:
- document.body[i+1:i+1] = document.body[i][j+1:]
+ document.body.insert(i+1, document.body[i][j+1:])
# Delete the accented characters
- if j > 1:
- document.body[i] = document.body[i][:j-2]
- else:
- document.body[i] = u''
+ document.body[i] = document.body[i][:j-1]
# Finally add the InsetLaTeXAccent
document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
break
+ i = i + 1
+
# Normalize to "Normal form C" (NFC, pre-composed characters) again
- for i in range(numberoflines):
- document.body[i] = unicodedata.normalize("NFC", document.body[i])
+ for i in range(len(document.body)):
+ document.body[i] = normalize("NFC", document.body[i])
def normalize_font_whitespace_259(document):
""" Before format 259 the font changes were ignored if a
whitespace was the first or last character in the sequence, this function
transfers the whitespace outside."""
-
+
char_properties = {"\\series": "default",
"\\emph": "default",
"\\color": "none",
def normalize_font_whitespace_274(document):
""" Before format 259 (sic) the font changes were ignored if a
- whitespace was the first or last character in the sequence. This was
- corrected for most font properties in format 259, but the language
+ whitespace was the first or last character in the sequence. This was
+ corrected for most font properties in format 259, but the language
was forgotten then. This function applies the same conversion done
there (namely, transfers the whitespace outside) for font language
changes, as well."""
def get_paragraph_language(document, i):
""" Return the language of the paragraph in which line i of the document
body is. If the first thing in the paragraph is a \\lang command, that
- is the paragraph's langauge; otherwise, the paragraph's language is the
+ is the paragraph's langauge; otherwise, the paragraph's language is the
document's language."""
lines = document.body
-
+
first_nonempty_line = \
find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
return words[1]
else:
return document.language
-
+
def normalize_font_whitespace(document, char_properties):
""" Before format 259 the font changes were ignored if a
whitespace was the first or last character in the sequence, this function
# a new paragraph resets all font changes
changes.clear()
# also reset the default language to be the paragraph's language
- if "\\lang" in char_properties.keys():
+ if "\\lang" in list(char_properties.keys()):
char_properties["\\lang"] = \
get_paragraph_language(document, i + 1)
- elif len(words) > 1 and words[0] in char_properties.keys():
+ elif len(words) > 1 and words[0] in list(char_properties.keys()):
# we have a font change
if char_properties[words[0]] == words[1]:
# property gets reset
- if words[0] in changes.keys():
+ if words[0] in list(changes.keys()):
del changes[words[0]]
defaultproperty = True
else:
lines[i-1] = lines[i-1][:-1]
# a space before the font change
added_lines = [" "]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that is already in lines[i]
if k != words[0]:
added_lines[1:1] = ["%s %s" % (k, changes[k])]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that must be added below anyway
if k != words[0]:
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
continue
lines[i+1] = lines[i+1][1:]
added_lines = [" "]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that is already in lines[i]
if k != words[0]:
added_lines[1:1] = ["%s %s" % (k, changes[k])]
- for k in changes.keys():
+ for k in list(changes.keys()):
# exclude property k because that must be added below anyway
if k != words[0]:
added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
def revert_beamer_alert(document):
" Revert beamer's \\alert inset back to ERT. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
if i == -1:
return
document.body[i] = "\\begin_inset ERT"
i = i + 1
- while 1:
+ while True:
if (document.body[i][:13] == "\\begin_layout"):
# Insert the \alert command
document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
def revert_beamer_structure(document):
" Revert beamer's \\structure inset back to ERT. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
if i == -1:
return
document.body[i] = "\\begin_inset ERT"
i = i + 1
- while 1:
+ while True:
if (document.body[i][:13] == "\\begin_layout"):
document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
break
def convert_graphics_rotation(document):
" add scaleBeforeRotation graphics parameter. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset Graphics", i)
if i == -1:
return
def revert_graphics_rotation(document):
" remove scaleBeforeRotation graphics parameter. "
i = 0
- while 1:
+ while True:
i = find_token(document.body, "\\begin_inset Graphics", i)
if i == -1:
return
def convert_tableborder(document):
- # The problematic is: LyX double the table cell border as it ignores the "|" character in
+ # The problem is: LyX doubles the table cell border as it ignores the "|" character in
# the cell arguments. A fix takes care of this and therefore the "|" has to be removed
i = 0
while i < len(document.body):
# the two tokens have to be in one line
if (h != -1 and k != -1):
# delete the "|"
- document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
+ document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
i = i + 1
def revert_armenian(document):
-
- # set inputencoding from armscii8 to auto
+
+ # set inputencoding from armscii8 to auto
if document.inputencoding == "armscii8":
i = find_token(document.header, "\\inputencoding", 0)
if i != -1:
document.header[i] = "\\inputencoding auto"
- # check if preamble exists, if not k is set to -1
+ # check if preamble exists, if not k is set to -1
i = 0
k = -1
while i < len(document.preamble):
# create the preamble when it doesn't exist
else:
document.preamble.append('\\usepackage{armtex}')
- # Set document language from armenian to english
+ # Set document language from armenian to english
if document.language == "armenian":
document.language = "english"
i = find_token(document.header, "\\language", 0)
def revert_listings_inset(document):
- r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
+ r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
FROM
-\begin_inset
+\begin_inset
lstparams "language=Delphi"
inline true
status open
k = cap_end + 1
inlinecode = ''
# looking for the oneline code for lstinline
- inlinecode = document.body[find_end_of_layout(document.body,
- find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
+ inlinecode = document.body[find_end_of_layout(document.body,
+ find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
if len(caption) > 0:
if len(params) == 0:
params = 'caption={%s}' % caption
if inline == 'true':
document.body[i:(j+1)] = [r'\begin_inset ERT',
'status %s' % status,
- r'\begin_layout Standard',
- '',
+ r'\begin_layout %s' % document.default_layout,
+ '',
'',
r'\backslash',
'lstinline%s{%s}' % (params, inlinecode),
document.body[i: j+1] = [r'\begin_inset ERT',
'status %s' % status,
'',
- r'\begin_layout Standard',
+ r'\begin_layout %s' % document.default_layout,
'',
'',
r'\backslash',
r'begin{lstlisting}%s' % params,
- r'\end_layout'
+ r'\end_layout',
+ '',
+ r'\begin_layout %s' % document.default_layout,
] + document.body[k : j - 1] + \
['',
- r'\begin_layout Standard',
+ r'\begin_layout %s' % document.default_layout,
'',
r'\backslash',
'end{lstlisting}',
r'\end_layout',
'',
r'\end_inset']
-
+
def revert_include_listings(document):
r''' Revert lstinputlisting Include option , translate
# find command line lstinputlisting{file}[options]
cmd, file, option = '', '', ''
if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
- cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
+ cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
option = option.replace('\\', '\\backslash\n')
document.body[i : j + 1] = [r'\begin_inset ERT',
'status open',
'',
- r'\begin_layout Standard',
+ r'\begin_layout %s' % document.default_layout,
'',
'',
r'\backslash',
else:
del document.header[i]
+
def revert_separator_layout(document):
r'''Revert --Separator-- to a lyx note
From
if j == -1:
# this should not happen
break
- document.body[i : j + 1] = [r'\begin_layout Standard',
+ document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
r'\begin_inset Note Note',
'status open',
'',
- r'\begin_layout Standard',
+ r'\begin_layout %s' % document.default_layout,
'Separate Environment',
r'\end_layout',
'',
r'\end_layout'
]
+
def convert_arabic (document):
if document.language == "arabic":
document.language = "arabic_arabtex"
# change the language name
document.body[i] = '\lang arabic_arabtex'
i = i + 1
-
+
+
def revert_arabic (document):
if document.language == "arabic_arabtex":
document.language = "arabic"
document.body[i] = '\lang arabic'
i = i + 1
-def revert_unicode(document):
- '''Transform unicode symbols according to the unicode list.
-Preamble flags are not implemented.
-Combination characters are currently ignored.
-Forced output is currently not enforced'''
- pathname = os.path.dirname(sys.argv[0])
- fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
- spec_chars = {}
- for line in fp.readlines():
- if line[0] != '#':
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # replace \" by " (for characters with diaresis)
- try:
- # flag1 and flag2 are preamble & flags
- # currently NOT implemented
- [ucs4,command,flag1,flag2] =line.split(None,3)
- spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
- except:
- pass
- fp.close()
- # Define strings to start and end ERT and math insets
- ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
- ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
- math_intro='\n\\begin_inset Formula $'
- math_outro='$\n\\end_inset\n'
- # Find unicode characters and replace them
- in_ert = 0 # flag set to 1 if in ERT inset
- in_math = 0 # flag set to 1 if in math inset
- insets = [] # list of active insets
- for i, current_line in enumerate(document.body):
- if current_line.find('\\begin_inset') > -1:
- # check which inset to start
- if current_line.find('\\begin_inset ERT') > -1:
- in_ert = 1
- insets.append('ert')
- elif current_line.find('\\begin_inset Formula') > -1:
- in_math = 1
- insets.append('math')
- else:
- insets.append('other')
- if current_line.find('\\end_inset') > -1:
- # check which inset to end
- try:
- cur_inset = insets.pop()
- if cur_inset == 'ert':
- in_ert = 0
- elif cur_inset == 'math':
- in_math = 0
- else:
- pass # end of other inset
- except:
- pass # inset list was empty (for some reason)
- current_line=''; # clear to have as container for modified line
- for j in range(len(document.body[i])):
- if spec_chars.has_key(document.body[i][j]):
- flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
- if flags.find('combining') > -1:
- command = ''
- else:
- command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
- if command[0:2] == '\\\\':
- if command[2:12]=='ensuremath':
- if in_ert == 1:
- # math in ERT
- command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
- command = command.replace('}', '$\n')
- elif in_math == 0:
- # add a math inset with the replacement character
- command = command.replace('\\\\ensuremath{\\', math_intro)
- command = command.replace('}', math_outro)
- else:
- # we are already in a math inset
- command = command.replace('\\\\ensuremath{\\', '')
- command = command.replace('}', '')
- else:
- if in_math == 1:
- # avoid putting an ERT in a math; instead put command as text
- command = command.replace('\\\\', '\mathrm{')
- command = command + '}'
- elif in_ert == 0:
- # add an ERT inset with the replacement character
- command = command.replace('\\\\', ert_intro)
- command = command + ert_outro
- else:
- command = command.replace('\\\\', '\n\\backslash\n')
- current_line = current_line + command
- else:
- current_line = current_line + document.body[i][j]
- document.body[i] = current_line
-
##
# Conversion hub
if __name__ == "__main__":
pass
-
-