+def revert_nomenclature(document):
+ " Convert nomenclature entry to ERT. "
+ regex = re.compile(r'(\S+)\s+(.+)')
+ i = 0
+ use_nomencl = 0
+ while 1:
+ i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
+ if i == -1:
+ break
+ use_nomencl = 1
+ j = find_end_of_inset(document.body, i + 1)
+ preview_line = ""
+ symbol = ""
+ description = ""
+ prefix = ""
+ for k in range(i + 1, j):
+ match = re.match(regex, document.body[k])
+ if match:
+ name = match.group(1)
+ value = match.group(2)
+ if name == "preview":
+ preview_line = document.body[k]
+ elif name == "symbol":
+ symbol = value.strip('"').replace('\\"', '"')
+ elif name == "description":
+ description = value.strip('"').replace('\\"', '"')
+ elif name == "prefix":
+ prefix = value.strip('"').replace('\\"', '"')
+ elif document.body[k].strip() != "":
+ document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
+ if prefix == "":
+ command = 'nomenclature{%s}{%s}' % (symbol, description)
+ else:
+ command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
+ document.body[i:j+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ command,
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 11
+ if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
+ document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
+ document.preamble.append('\\makenomenclature')
+
+
+def revert_printnomenclature(document):
+ " Convert printnomenclature to ERT. "
+ regex = re.compile(r'(\S+)\s+(.+)')
+ i = 0
+ use_nomencl = 0
+ while 1:
+ i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
+ if i == -1:
+ break
+ use_nomencl = 1
+ j = find_end_of_inset(document.body, i + 1)
+ preview_line = ""
+ labelwidth = ""
+ for k in range(i + 1, j):
+ match = re.match(regex, document.body[k])
+ if match:
+ name = match.group(1)
+ value = match.group(2)
+ if name == "preview":
+ preview_line = document.body[k]
+ elif name == "labelwidth":
+ labelwidth = value.strip('"').replace('\\"', '"')
+ elif document.body[k].strip() != "":
+ document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
+ if labelwidth == "":
+ command = 'nomenclature{}'
+ else:
+ command = 'nomenclature[%s]' % labelwidth
+ document.body[i:j+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ command,
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 11
+ if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
+ document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
+ document.preamble.append('\\makenomenclature')
+
+
+def convert_esint(document):
+ " Add \\use_esint setting to header. "
+ i = find_token(document.header, "\\cite_engine", 0)
+ if i == -1:
+ document.warning("Malformed LyX document: Missing `\\cite_engine'.")
+ return
+ # 0 is off, 1 is auto, 2 is on.
+ document.header.insert(i, '\\use_esint 0')
+
+
+def revert_esint(document):
+ " Remove \\use_esint setting from header. "
+ i = find_token(document.header, "\\use_esint", 0)
+ if i == -1:
+ document.warning("Malformed LyX document: Missing `\\use_esint'.")
+ return
+ use_esint = document.header[i].split()[1]
+ del document.header[i]
+ # 0 is off, 1 is auto, 2 is on.
+ if (use_esint == 2):
+ document.preamble.append('\\usepackage{esint}')
+
+
+def revert_clearpage(document):
+ " clearpage -> ERT "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\clearpage", i)
+ if i == -1:
+ break
+ document.body[i:i+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ 'clearpage',
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 1
+
+
+def revert_cleardoublepage(document):
+ " cleardoublepage -> ERT "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\cleardoublepage", i)
+ if i == -1:
+ break
+ document.body[i:i+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '\\backslash',
+ 'cleardoublepage',
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ i = i + 1
+
+
+def convert_lyxline(document):
+ " remove fontsize commands for \lyxline "
+ # The problematic is: The old \lyxline definition doesn't handle the fontsize
+ # to change the line thickness. The new definiton does this so that imported
+ # \lyxlines would have a different line thickness. The eventual fontsize command
+ # before \lyxline is therefore removed to get the same output.
+ fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
+ "large", "Large", "LARGE", "huge", "Huge"]
+ for n in range(0, len(fontsizes)):
+ i = 0
+ k = 0
+ while i < len(document.body):
+ i = find_token(document.body, "\\size " + fontsizes[n], i)
+ k = find_token(document.body, "\\lyxline",i)
+ # the corresponding fontsize command is always 2 lines before the \lyxline
+ if (i != -1 and k == i+2):
+ document.body[i:i+1] = []
+ else:
+ break
+ i = i + 1
+
+
+def revert_encodings(document):
+ " Set new encodings to auto. "
+ encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
+ "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
+ "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
+ i = find_token(document.header, "\\inputencoding", 0)
+ if i == -1:
+ document.header.append("\\inputencoding auto")
+ else:
+ inputenc = get_value(document.header, "\\inputencoding", i)
+ if inputenc in encodings:
+ document.header[i] = "\\inputencoding auto"
+ document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def convert_caption(document):
+ " Convert caption layouts to caption insets. "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\begin_layout Caption", i)
+ if i == -1:
+ return
+ j = find_end_of_layout(document.body, i)
+ if j == -1:
+ document.warning("Malformed LyX document: Missing `\\end_layout'.")
+ return
+
+ document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
+ document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
+ "\\begin_inset Caption", "",
+ "\\begin_layout %s" % document.default_layout]
+ i = i + 1
+
+
+def revert_caption(document):
+ " Convert caption insets to caption layouts. "
+ " This assumes that the text class has a caption style. "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\begin_inset Caption", i)
+ if i == -1:
+ return
+
+ # We either need to delete the previous \begin_layout line, or we
+ # need to end the previous layout if this inset is not in the first
+ # position of the paragraph.
+ layout_before = find_token_backwards(document.body, "\\begin_layout", i)
+ if layout_before == -1:
+ document.warning("Malformed LyX document: Missing `\\begin_layout'.")
+ return
+ layout_line = document.body[layout_before]
+ del_layout_before = True
+ l = layout_before + 1
+ while l < i:
+ if document.body[l] != "":
+ del_layout_before = False
+ break
+ l = l + 1
+ if del_layout_before:
+ del document.body[layout_before:i]
+ i = layout_before
+ else:
+ document.body[i:i] = ["\\end_layout", ""]
+ i = i + 2
+
+ # Find start of layout in the inset and end of inset
+ j = find_token(document.body, "\\begin_layout", i)
+ if j == -1:
+ document.warning("Malformed LyX document: Missing `\\begin_layout'.")
+ return
+ k = find_end_of_inset(document.body, i)
+ if k == -1:
+ document.warning("Malformed LyX document: Missing `\\end_inset'.")
+ return
+
+ # We either need to delete the following \end_layout line, or we need
+ # to restart the old layout if this inset is not at the paragraph end.
+ layout_after = find_token(document.body, "\\end_layout", k)
+ if layout_after == -1:
+ document.warning("Malformed LyX document: Missing `\\end_layout'.")
+ return
+ del_layout_after = True
+ l = k + 1
+ while l < layout_after:
+ if document.body[l] != "":
+ del_layout_after = False
+ break
+ l = l + 1
+ if del_layout_after:
+ del document.body[k+1:layout_after+1]
+ else:
+ document.body[k+1:k+1] = [layout_line, ""]
+
+ # delete \begin_layout and \end_inset and replace \begin_inset with
+ # "\begin_layout Caption". This works because we can only have one
+ # paragraph in the caption inset: The old \end_layout will be recycled.
+ del document.body[k]
+ if document.body[k] == "":
+ del document.body[k]
+ del document.body[j]
+ if document.body[j] == "":
+ del document.body[j]
+ document.body[i] = "\\begin_layout Caption"
+ if document.body[i+1] == "":
+ del document.body[i+1]
+ i = i + 1
+
+
+# Accents of InsetLaTeXAccent
+accent_map = {
+ "`" : u'\u0300', # grave
+ "'" : u'\u0301', # acute
+ "^" : u'\u0302', # circumflex
+ "~" : u'\u0303', # tilde
+ "=" : u'\u0304', # macron
+ "u" : u'\u0306', # breve
+ "." : u'\u0307', # dot above
+ "\"": u'\u0308', # diaresis
+ "r" : u'\u030a', # ring above
+ "H" : u'\u030b', # double acute
+ "v" : u'\u030c', # caron
+ "b" : u'\u0320', # minus sign below
+ "d" : u'\u0323', # dot below
+ "c" : u'\u0327', # cedilla
+ "k" : u'\u0328', # ogonek
+ "t" : u'\u0361' # tie. This is special: It spans two characters, but
+ # only one is given as argument, so we don't need to
+ # treat it differently.
+}
+
+
+# special accents of InsetLaTeXAccent without argument
+special_accent_map = {
+ 'i' : u'\u0131', # dotless i
+ 'j' : u'\u0237', # dotless j
+ 'l' : u'\u0142', # l with stroke
+ 'L' : u'\u0141' # L with stroke
+}
+
+
+# special accent arguments of InsetLaTeXAccent
+accented_map = {
+ '\\i' : u'\u0131', # dotless i
+ '\\j' : u'\u0237' # dotless j
+}
+
+
+def _convert_accent(accent, accented_char):
+ type = accent
+ char = accented_char
+ if char == '':
+ if type in special_accent_map:
+ return special_accent_map[type]
+ # a missing char is treated as space by LyX
+ char = ' '
+ elif type == 'q' and char in ['t', 'd', 'l', 'L']:
+ # Special caron, only used with t, d, l and L.
+ # It is not in the map because we convert it to the same unicode
+ # character as the normal caron: \q{} is only defined if babel with
+ # the czech or slovak language is used, and the normal caron
+ # produces the correct output if the T1 font encoding is used.
+ # For the same reason we never convert to \q{} in the other direction.
+ type = 'v'
+ elif char in accented_map:
+ char = accented_map[char]
+ elif (len(char) > 1):
+ # We can only convert accents on a single char
+ return ''
+ a = accent_map.get(type)
+ if a:
+ return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+ return ''
+
+
+def convert_ertbackslash(body, i, ert, default_layout):
+ r""" -------------------------------------------------------------------------------------------
+ Convert backslashes and '\n' into valid ERT code, append the converted
+ text to body[i] and return the (maybe incremented) line index i"""
+
+ for c in ert:
+ if c == '\\':
+ body[i] = body[i] + '\\backslash '
+ i = i + 1
+ body.insert(i, '')
+ elif c == '\n':
+ body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
+ i = i + 4
+ else:
+ body[i] = body[i] + c
+ return i
+
+
+def convert_accent(document):
+ # The following forms are supported by LyX:
+ # '\i \"{a}' (standard form, as written by LyX)
+ # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
+ # '\i \"{ }' (also accepted if the accented char is a space)
+ # '\i \" a' (also accepted)
+ # '\i \"' (also accepted)
+ re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
+ re_contents = re.compile(r'^([^\s{]+)(.*)$')
+ re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
+ i = 0
+ while 1:
+ i = find_re(document.body, re_wholeinset, i)
+ if i == -1:
+ return
+ match = re_wholeinset.match(document.body[i])
+ prefix = match.group(1)
+ contents = match.group(3).strip()
+ match = re_contents.match(contents)
+ if match:
+ # Strip first char (always \)
+ accent = match.group(1)[1:]
+ accented_contents = match.group(2).strip()
+ match = re_accentedcontents.match(accented_contents)
+ accented_char = match.group(1)
+ converted = _convert_accent(accent, accented_char)
+ if converted == '':
+ # Normalize contents
+ contents = '%s{%s}' % (accent, accented_char),
+ else:
+ document.body[i] = '%s%s' % (prefix, converted)
+ i += 1
+ continue
+ document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
+ document.body[i] = prefix
+ document.body[i+1:i+1] = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout %s' % document.default_layout,
+ '',
+ '',
+ '']
+ i = convert_ertbackslash(document.body, i + 7,
+ '\\%s' % contents,
+ document.default_layout)
+ document.body[i+1:i+1] = ['\\end_layout',
+ '',
+ '\\end_inset']
+ i += 3
+
+
+def revert_accent(document):
+ inverse_accent_map = {}
+ for k in accent_map:
+ inverse_accent_map[accent_map[k]] = k
+ inverse_special_accent_map = {}
+ for k in special_accent_map:
+ inverse_special_accent_map[special_accent_map[k]] = k
+ inverse_accented_map = {}
+ for k in accented_map:
+ inverse_accented_map[accented_map[k]] = k
+
+ # Since LyX may insert a line break within a word we must combine all
+ # words before unicode normalization.
+ # We do this only if the next line starts with an accent, otherwise we
+ # would create things like '\begin_inset ERTstatus'.
+ numberoflines = len(document.body)
+ for i in range(numberoflines-1):
+ if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
+ continue
+ if (document.body[i+1][0] in inverse_accent_map):
+ # the last character of this line and the first of the next line
+ # form probably a surrogate pair.
+ while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
+ document.body[i] += document.body[i+1][0]
+ document.body[i+1] = document.body[i+1][1:]
+
+ # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
+ # This is needed to catch all accented characters.
+ for i in range(numberoflines):
+ # Unfortunately we have a mixture of unicode strings and plain strings,
+ # because we never use u'xxx' for string literals, but 'xxx'.
+ # Therefore we may have to try two times to normalize the data.
+ try:
+ document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+ except TypeError:
+ document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+
+ # Replace accented characters with InsetLaTeXAccent
+ # Do not convert characters that can be represented in the chosen
+ # encoding.
+ encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
+ lang_re = re.compile(r"^\\lang\s(\S+)")
+ for i in range(len(document.body)):
+
+ if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
+ # Track the encoding of the current line
+ result = lang_re.match(document.body[i])
+ if result:
+ language = result.group(1)
+ if language == "default":
+ encoding_stack[-1] = document.encoding
+ else:
+ from lyx2lyx_lang import lang
+ encoding_stack[-1] = lang[language][3]
+ continue
+ elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
+ encoding_stack.append(encoding_stack[-1])
+ continue
+ elif find_token(document.body, "\\end_layout", i, i + 1) == i:
+ del encoding_stack[-1]
+ continue
+
+ for j in range(len(document.body[i])):
+ # dotless i and dotless j are both in special_accent_map and can
+ # occur as an accented character, so we need to test that the
+ # following character is no accent
+ if (document.body[i][j] in inverse_special_accent_map and
+ (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
+ accent = document.body[i][j]
+ try:
+ dummy = accent.encode(encoding_stack[-1])
+ except UnicodeEncodeError:
+ # Insert the rest of the line as new line
+ if j < len(document.body[i]) - 1:
+ document.body[i+1:i+1] = document.body[i][j+1:]
+ # Delete the accented character
+ if j > 0:
+ document.body[i] = document.body[i][:j-1]
+ else:
+ document.body[i] = u''
+ # Finally add the InsetLaTeXAccent
+ document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
+ break
+ elif j > 0 and document.body[i][j] in inverse_accent_map:
+ accented_char = document.body[i][j-1]
+ if accented_char == ' ':
+ # Conform to LyX output
+ accented_char = ''
+ elif accented_char in inverse_accented_map:
+ accented_char = inverse_accented_map[accented_char]
+ accent = document.body[i][j]
+ try:
+ dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+ except UnicodeEncodeError:
+ # Insert the rest of the line as new line
+ if j < len(document.body[i]) - 1:
+ document.body[i+1:i+1] = document.body[i][j+1:]
+ # Delete the accented characters
+ if j > 1:
+ document.body[i] = document.body[i][:j-2]
+ else:
+ document.body[i] = u''
+ # Finally add the InsetLaTeXAccent
+ document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
+ break
+ # Normalize to "Normal form C" (NFC, pre-composed characters) again
+ for i in range(numberoflines):
+ document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+
+
+def normalize_font_whitespace(document):
+ """ Before format 259 the font changes were ignored if a
+ whitespace was the first or last character in the sequence, this function
+ transfers the whitespace outside."""
+
+ if document.backend != "latex":
+ return
+
+ lines = document.body
+
+ char_properties = {"\\series": "default",
+ "\\emph": "default",
+ "\\color": "none",
+ "\\shape": "default",
+ "\\bar": "default",
+ "\\family": "default"}
+ changes = {}
+
+ i = 0
+ while i < len(lines):
+ words = lines[i].split()
+
+ if len(words) > 0 and words[0] == "\\begin_layout":
+ # a new paragraph resets all font changes
+ changes.clear()
+
+ elif len(words) > 1 and words[0] in char_properties.keys():
+ # we have a font change
+ if char_properties[words[0]] == words[1]:
+ # property gets reset
+ if words[0] in changes.keys():
+ del changes[words[0]]
+ defaultproperty = True
+ else:
+ # property gets set
+ changes[words[0]] = words[1]
+ defaultproperty = False
+
+ # We need to explicitly reset all changed properties if we find
+ # a space below, because LyX 1.4 would output the space after
+ # closing the previous change and before starting the new one,
+ # and closing a font change means to close all properties, not
+ # just the changed one.
+
+ if lines[i-1] and lines[i-1][-1] == " ":
+ lines[i-1] = lines[i-1][:-1]
+ # a space before the font change
+ added_lines = [" "]
+ for k in changes.keys():
+ # exclude property k because that is already in lines[i]
+ if k != words[0]:
+ added_lines[1:1] = ["%s %s" % (k, changes[k])]
+ for k in changes.keys():
+ # exclude property k because that must be added below anyway
+ if k != words[0]:
+ added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
+ if defaultproperty:
+ # Property is reset in lines[i], so add the new stuff afterwards
+ lines[i+1:i+1] = added_lines
+ else:
+ # Reset property for the space
+ added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
+ lines[i:i] = added_lines
+ i = i + len(added_lines)
+
+ elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
+ # a space after the font change
+ if (lines[i+1] == " " and lines[i+2]):
+ next_words = lines[i+2].split()
+ if len(next_words) > 0 and next_words[0] == words[0]:
+ # a single blank with a property different from the
+ # previous and the next line must not be changed
+ i = i + 2
+ continue
+ lines[i+1] = lines[i+1][1:]
+ added_lines = [" "]
+ for k in changes.keys():
+ # exclude property k because that is already in lines[i]
+ if k != words[0]:
+ added_lines[1:1] = ["%s %s" % (k, changes[k])]
+ for k in changes.keys():
+ # exclude property k because that must be added below anyway
+ if k != words[0]:
+ added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
+ # Reset property for the space
+ added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
+ lines[i:i] = added_lines
+ i = i + len(added_lines)
+
+ i = i + 1
+
+
+def revert_utf8x(document):
+ " Set utf8x encoding to utf8. "
+ i = find_token(document.header, "\\inputencoding", 0)
+ if i == -1:
+ document.header.append("\\inputencoding auto")
+ else:
+ inputenc = get_value(document.header, "\\inputencoding", i)
+ if inputenc == "utf8x":
+ document.header[i] = "\\inputencoding utf8"
+ document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+