+ " Set document encoding to UTF-8. "
+ convert_multiencoding(document, True)
+ document.encoding = "utf8"
+
+
+def revert_utf8(document):
+ " Set document encoding to the value corresponding to inputencoding. "
+ i = find_token(document.header, "\\inputencoding", 0)
+ if i == -1:
+ document.header.append("\\inputencoding auto")
+ elif get_value(document.header, "\\inputencoding", i) == "utf8":
+ document.header[i] = "\\inputencoding auto"
+ document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+ document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
+ convert_multiencoding(document, False)
+
+
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
+def read_unicodesymbols():
+ " Read the unicodesymbols list of unicode characters and corresponding commands."
+ pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+ fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+ spec_chars = {}
+ for line in fp.readlines():
+ if line[0] != '#':
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+ try:
+ # flag1 and flag2 are preamble and other flags
+ [ucs4,command,flag1,flag2] =line.split(None,3)
+ spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+ except:
+ pass
+ fp.close()
+ return spec_chars
+
+
+def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
+ # Define strings to start and end ERT and math insets
+ ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
+ ert_outro='\n\\end_layout\n\n\\end_inset\n'
+ math_intro='\n\\begin_inset Formula $'
+ math_outro='$\n\\end_inset'
+
+ mod_line = u''
+ if i and not is_inset_line(document, i-1):
+ last_char = document.body[i - 1][-1:]
+ else:
+ last_char = ''
+
+ line = document.body[i]
+ for character in line:
+ try:
+ # Try to write the character
+ dummy = character.encode(document.encoding)
+ mod_line += character
+ last_char = character
+ except:
+ # Try to replace with ERT/math inset
+ if character in spec_chars:
+ command = spec_chars[character][0] # the command to replace unicode
+ flag1 = spec_chars[character][1]
+ flag2 = spec_chars[character][2]
+ if flag1.find('combining') > -1 or flag2.find('combining') > -1:
+ # We have a character that should be combined with the previous
+ command += '{' + last_char + '}'
+ # Remove the last character. Ignore if it is whitespace
+ if len(last_char.rstrip()):
+ # last_char was found and is not whitespace
+ if mod_line:
+ mod_line = mod_line[:-1]
+ else: # last_char belongs to the last line
+ document.body[i-1] = document.body[i-1][:-1]
+ else:
+ # The last character was replaced by a command. For now it is
+ # ignored. This could be handled better.
+ pass
+ if command[0:2] == '\\\\':
+ if command[2:12]=='ensuremath':
+ if insets and insets[-1] == "ERT":
+ # math in ERT
+ command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+ command = command.replace('}', '$\n')
+ elif not insets or insets[-1] != "Formula":
+ # add a math inset with the replacement character
+ command = command.replace('\\\\ensuremath{\\', math_intro)
+ command = command.replace('}', math_outro)
+ else:
+ # we are already in a math inset
+ command = command.replace('\\\\ensuremath{\\', '')
+ command = command.replace('}', '')
+ else:
+ if insets and insets[-1] == "Formula":
+ # avoid putting an ERT in a math; instead put command as text
+ command = command.replace('\\\\', '\mathrm{')
+ command = command + '}'
+ elif not insets or insets[-1] != "ERT":
+ # add an ERT inset with the replacement character
+ command = command.replace('\\\\', '\n\\backslash\n')
+ command = ert_intro + command + ert_outro
+ else:
+ command = command.replace('\\\\', '\n\\backslash\n')
+ last_char = '' # indicate that the character should not be removed
+ mod_line += command
+ else:
+ # Replace with replacement string
+ mod_line += replacement_character
+ return mod_line
+
+
+def revert_unicode(document):
+ '''Transform unicode characters that can not be written using the
+document encoding to commands according to the unicodesymbols
+file. Characters that can not be replaced by commands are replaced by
+an replacement string. Flags other than 'combined' are currently not
+implemented.'''
+ spec_chars = read_unicodesymbols()
+ insets = [] # list of active insets
+
+ # Go through the document to capture all combining characters
+ i = 0
+ while i < len(document.body):
+ line = document.body[i]
+ # Check for insets
+ if line.find('\\begin_inset') > -1:
+ insets.append(line[13:].split()[0])
+ if line.find('\\end_inset') > -1:
+ del insets[-1]
+
+ # Try to write the line
+ try:
+ # If all goes well the line is written here
+ dummy = line.encode(document.encoding)
+ i += 1
+ except:
+ # Error, some character(s) in the line need to be replaced
+ mod_line = revert_unicode_line(document, i, insets, spec_chars)
+ document.body[i:i+1] = mod_line.split('\n')
+ i += len(mod_line.split('\n'))
+
+
+def revert_cs_label(document):
+ " Remove status flag of charstyle label. "
+ i = 0
+ while True:
+ i = find_token(document.body, "\\begin_inset CharStyle", i)
+ if i == -1:
+ return
+ # Seach for a line starting 'show_label'
+ # If it is not there, break with a warning message
+ i = i + 1
+ while True:
+ if (document.body[i][:10] == "show_label"):
+ del document.body[i]
+ break
+ elif (document.body[i][:13] == "\\begin_layout"):
+ document.warning("Malformed LyX document: Missing 'show_label'.")
+ break
+ i = i + 1
+
+ i = i + 1
+
+
+def convert_bibitem(document):
+ """ Convert
+\bibitem [option]{argument}
+
+to
+
+\begin_inset LatexCommand bibitem
+label "option"
+key "argument"
+
+\end_inset
+
+This must be called after convert_commandparams.
+"""
+ i = 0
+ while True:
+ i = find_token(document.body, "\\bibitem", i)
+ if i == -1:
+ break
+ j = document.body[i].find('[') + 1
+ k = document.body[i].rfind(']')
+ if j == 0: # No optional argument found
+ option = None
+ else:
+ option = document.body[i][j:k]
+ j = document.body[i].rfind('{') + 1
+ k = document.body[i].rfind('}')
+ argument = document.body[i][j:k]
+ lines = ['\\begin_inset LatexCommand bibitem']
+ if option != None:
+ lines.append('label "%s"' % option.replace('"', '\\"'))
+ lines.append('key "%s"' % argument.replace('"', '\\"'))
+ lines.append('')
+ lines.append('\\end_inset')
+ document.body[i:i+1] = lines
+ i = i + 1
+
+
+commandparams_info = {
+ # command : [option1, option2, argument]
+ "bibitem" : ["label", "", "key"],
+ "bibtex" : ["options", "btprint", "bibfiles"],
+ "cite" : ["after", "before", "key"],
+ "citet" : ["after", "before", "key"],
+ "citep" : ["after", "before", "key"],
+ "citealt" : ["after", "before", "key"],
+ "citealp" : ["after", "before", "key"],
+ "citeauthor" : ["after", "before", "key"],
+ "citeyear" : ["after", "before", "key"],
+ "citeyearpar" : ["after", "before", "key"],
+ "citet*" : ["after", "before", "key"],
+ "citep*" : ["after", "before", "key"],
+ "citealt*" : ["after", "before", "key"],
+ "citealp*" : ["after", "before", "key"],
+ "citeauthor*" : ["after", "before", "key"],
+ "Citet" : ["after", "before", "key"],
+ "Citep" : ["after", "before", "key"],
+ "Citealt" : ["after", "before", "key"],
+ "Citealp" : ["after", "before", "key"],
+ "Citeauthor" : ["after", "before", "key"],
+ "Citet*" : ["after", "before", "key"],
+ "Citep*" : ["after", "before", "key"],
+ "Citealt*" : ["after", "before", "key"],
+ "Citealp*" : ["after", "before", "key"],
+ "Citeauthor*" : ["after", "before", "key"],
+ "citefield" : ["after", "before", "key"],
+ "citetitle" : ["after", "before", "key"],
+ "cite*" : ["after", "before", "key"],
+ "hfill" : ["", "", ""],
+ "index" : ["", "", "name"],
+ "printindex" : ["", "", "name"],
+ "label" : ["", "", "name"],
+ "eqref" : ["name", "", "reference"],
+ "pageref" : ["name", "", "reference"],
+ "prettyref" : ["name", "", "reference"],
+ "ref" : ["name", "", "reference"],
+ "vpageref" : ["name", "", "reference"],
+ "vref" : ["name", "", "reference"],
+ "tableofcontents" : ["", "", "type"],
+ "htmlurl" : ["name", "", "target"],
+ "url" : ["name", "", "target"]}
+
+
+def convert_commandparams(document):
+ """ Convert
+
+ \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
+ \end_inset
+
+ to
+
+ \begin_inset LatexCommand cmdname
+ name1 "opt1"
+ name2 "opt2"
+ name3 "arg"
+ \end_inset
+
+ name1, name2 and name3 can be different for each command.
+"""
+ # \begin_inset LatexCommand bibitem was not the official version (see
+ # convert_bibitem()), but could be read in, so we convert it here, too.
+
+ i = 0
+ while True:
+ i = find_token(document.body, "\\begin_inset LatexCommand", i)
+ if i == -1:
+ break
+ command = document.body[i][26:].strip()
+ if command == "":
+ document.warning("Malformed LyX document: Missing LatexCommand name.")
+ i = i + 1
+ continue
+
+ j = find_token(document.body, "\\end_inset", i + 1)
+ if j == -1:
+ document.warning("Malformed document")
+ else:
+ command += "".join(document.body[i+1:j])
+ document.body[i+1:j] = []
+
+ # The following parser is taken from the original InsetCommandParams::scanCommand
+ name = ""
+ option1 = ""
+ option2 = ""
+ argument = ""
+ state = "WS"
+ # Used to handle things like \command[foo[bar]]{foo{bar}}
+ nestdepth = 0
+ b = 0
+ for c in command:
+ if ((state == "CMDNAME" and c == ' ') or
+ (state == "CMDNAME" and c == '[') or
+ (state == "CMDNAME" and c == '{')):
+ state = "WS"
+ if ((state == "OPTION" and c == ']') or
+ (state == "SECOPTION" and c == ']') or
+ (state == "CONTENT" and c == '}')):
+ if nestdepth == 0:
+ state = "WS"
+ else:
+ nestdepth = nestdepth - 1
+ if ((state == "OPTION" and c == '[') or
+ (state == "SECOPTION" and c == '[') or
+ (state == "CONTENT" and c == '{')):
+ nestdepth = nestdepth + 1
+ if state == "CMDNAME":
+ name += c
+ elif state == "OPTION":
+ option1 += c
+ elif state == "SECOPTION":
+ option2 += c
+ elif state == "CONTENT":
+ argument += c
+ elif state == "WS":
+ if c == '\\':
+ state = "CMDNAME"
+ elif c == '[' and b != ']':
+ state = "OPTION"
+ nestdepth = 0 # Just to be sure
+ elif c == '[' and b == ']':
+ state = "SECOPTION"
+ nestdepth = 0 # Just to be sure
+ elif c == '{':
+ state = "CONTENT"
+ nestdepth = 0 # Just to be sure
+ b = c
+
+ # Now we have parsed the command, output the parameters
+ lines = ["\\begin_inset LatexCommand %s" % name]
+ if option1 != "":
+ if commandparams_info[name][0] == "":
+ document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
+ else:
+ lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
+ if option2 != "":
+ if commandparams_info[name][1] == "":
+ document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
+ else:
+ lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
+ if argument != "":
+ if commandparams_info[name][2] == "":
+ document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
+ else:
+ lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
+ document.body[i:i+1] = lines
+ i = i + 1
+
+
+def revert_commandparams(document):
+ regex = re.compile(r'(\S+)\s+(.+)')
+ i = 0
+ while True:
+ i = find_token(document.body, "\\begin_inset LatexCommand", i)
+ if i == -1:
+ break
+ name = document.body[i].split()[2]
+ j = find_end_of_inset(document.body, i)
+ preview_line = ""
+ option1 = ""
+ option2 = ""
+ argument = ""
+ for k in range(i + 1, j):
+ match = re.match(regex, document.body[k])
+ if match:
+ pname = match.group(1)
+ pvalue = match.group(2)
+ if pname == "preview":
+ preview_line = document.body[k]
+ elif (commandparams_info[name][0] != "" and
+ pname == commandparams_info[name][0]):
+ option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
+ elif (commandparams_info[name][1] != "" and
+ pname == commandparams_info[name][1]):
+ option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
+ elif (commandparams_info[name][2] != "" and
+ pname == commandparams_info[name][2]):
+ argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
+ elif document.body[k].strip() != "":
+ document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
+ if name == "bibitem":
+ if option1 == "":
+ lines = ["\\bibitem {%s}" % argument]
+ else:
+ lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
+ else:
+ if option1 == "":
+ if option2 == "":
+ lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
+ else:
+ lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
+ else:
+ if option2 == "":
+ lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
+ else:
+ lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
+ if name != "bibitem":
+ if preview_line != "":
+ lines.append(preview_line)
+ lines.append('')
+ lines.append('\\end_inset')
+ document.body[i:j+1] = lines
+ i += len(lines) + 1
+
+
+def revert_nomenclature(document):
+ " Convert nomenclature entry to ERT. "
+ regex = re.compile(r'(\S+)\s+(.+)')
+ i = 0
+ use_nomencl = 0
+ while True:
+ i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
+ if i == -1:
+ break
+ use_nomencl = 1
+ j = find_end_of_inset(document.body, i + 1)
+ preview_line = ""
+ symbol = ""
+ description = ""
+ prefix = ""
+ for k in range(i + 1, j):
+ match = re.match(regex, document.body[k])
+ if match:
+ name = match.group(1)
+ value = match.group(2)
+ if name == "preview":
+ preview_line = document.body[k]
+ elif name == "symbol":
+ symbol = value.strip('"').replace('\\"', '"')
+ elif name == "description":
+ description = value.strip('"').replace('\\"', '"')
+ elif name == "prefix":
+ prefix = value.strip('"').replace('\\"', '"')
+ elif document.body[k].strip() != "":
+ document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
+ if prefix == "":
+ command = 'nomenclature{%s}{%s}' % (symbol, description)
+ else:
+ command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
+ document.body[i:j+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ command,
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 11
+ if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
+ document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
+ document.preamble.append('\\makenomenclature')
+
+
+def revert_printnomenclature(document):
+ " Convert printnomenclature to ERT. "
+ regex = re.compile(r'(\S+)\s+(.+)')
+ i = 0
+ use_nomencl = 0
+ while True:
+ i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
+ if i == -1:
+ break
+ use_nomencl = 1
+ j = find_end_of_inset(document.body, i + 1)
+ preview_line = ""
+ labelwidth = ""
+ for k in range(i + 1, j):
+ match = re.match(regex, document.body[k])
+ if match:
+ name = match.group(1)
+ value = match.group(2)
+ if name == "preview":
+ preview_line = document.body[k]
+ elif name == "labelwidth":
+ labelwidth = value.strip('"').replace('\\"', '"')
+ elif document.body[k].strip() != "":
+ document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
+ if labelwidth == "":
+ command = 'nomenclature{}'
+ else:
+ command = 'nomenclature[%s]' % labelwidth
+ document.body[i:j+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ command,
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 11
+ if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
+ document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
+ document.preamble.append('\\makenomenclature')
+
+
+def convert_esint(document):
+ " Add \\use_esint setting to header. "
+ i = find_token(document.header, "\\cite_engine", 0)
+ if i == -1:
+ document.warning("Malformed LyX document: Missing `\\cite_engine'.")
+ return
+ # 0 is off, 1 is auto, 2 is on.
+ document.header.insert(i, '\\use_esint 0')
+
+
+def revert_esint(document):
+ " Remove \\use_esint setting from header. "
+ i = find_token(document.header, "\\use_esint", 0)
+ if i == -1:
+ document.warning("Malformed LyX document: Missing `\\use_esint'.")
+ return
+ use_esint = document.header[i].split()[1]
+ del document.header[i]
+ # 0 is off, 1 is auto, 2 is on.
+ if (use_esint == 2):
+ document.preamble.append('\\usepackage{esint}')
+
+
+def revert_clearpage(document):
+ " clearpage -> ERT "
+ i = 0
+ while True:
+ i = find_token(document.body, "\\clearpage", i)
+ if i == -1:
+ break
+ document.body[i:i+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ 'clearpage',
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 1
+
+
+def revert_cleardoublepage(document):
+ " cleardoublepage -> ERT "
+ i = 0
+ while True:
+ i = find_token(document.body, "\\cleardoublepage", i)
+ if i == -1:
+ break
+ document.body[i:i+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ 'cleardoublepage',
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 1
+
+
+def convert_lyxline(document):
+ " remove fontsize commands for \lyxline "
+ # The problematic is: The old \lyxline definition doesn't handle the fontsize
+ # to change the line thickness. The new definiton does this so that imported
+ # \lyxlines would have a different line thickness. The eventual fontsize command
+ # before \lyxline is therefore removed to get the same output.
+ fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
+ "large", "Large", "LARGE", "huge", "Huge"]
+ for n in range(0, len(fontsizes)):
+ i = 0
+ k = 0
+ while i < len(document.body):
+ i = find_token(document.body, "\\size " + fontsizes[n], i)
+ k = find_token(document.body, "\\lyxline", i)
+ # the corresponding fontsize command is always 2 lines before the \lyxline
+ if (i != -1 and k == i+2):
+ document.body[i:i+1] = []
+ else:
+ break
+ i = i + 1
+
+
+def revert_encodings(document):
+ " Set new encodings to auto. "
+ encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
+ "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
+ "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]