from parser_tools import find_token, find_end_of, find_tokens, \
find_end_of_inset, find_end_of_layout, get_value, get_value_string
+
+from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
+ put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
+ revert_font_attrs, revert_layout_command, hex2ratio
####################################################################
# Private helper functions
return True
-# Note that text can be either a list of lines or a single line.
-def add_to_preamble(document, text):
- """ Add text to the preamble if it is not already there.
- Only the first line is checked!"""
-
- if not type(text) is list:
- # split on \n just in case
- # it'll give us the one element list we want
- # if there's no \n, too
- text = text.split('\n')
-
- if find_token(document.preamble, text[0], 0) != -1:
- return
-
- document.preamble.extend(text)
-
-
-# Note that text can be either a list of lines or a single line.
-# It should really be a list.
-def insert_to_preamble(index, document, text):
- """ Insert text to the preamble at a given line"""
-
- if not type(text) is list:
- # split on \n just in case
- # it'll give us the one element list we want
- # if there's no \n, too
- text = text.split('\n')
-
- document.preamble[index:index] = text
-
-
-def read_unicodesymbols():
- " Read the unicodesymbols list of unicode characters and corresponding commands."
- pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
- fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
- spec_chars = []
- # Two backslashes, followed by some non-word character, and then a character
- # in brackets. The idea is to check for constructs like: \"{u}, which is how
- # they are written in the unicodesymbols file; but they can also be written
- # as: \"u or even \" u.
- r = re.compile(r'\\\\(\W)\{(\w)\}')
- for line in fp.readlines():
- if line[0] != '#' and line.strip() != "":
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
- try:
- [ucs4,command,dead] = line.split(None,2)
- if command[0:1] != "\\":
- continue
- spec_chars.append([command, unichr(eval(ucs4))])
- except:
- continue
- m = r.match(command)
- if m != None:
- command = "\\\\"
- # If the character is a double-quote, then we need to escape it, too,
- # since it is done that way in the LyX file.
- if m.group(1) == "\"":
- command += "\\"
- commandbl = command
- command += m.group(1) + m.group(2)
- commandbl += m.group(1) + ' ' + m.group(2)
- spec_chars.append([command, unichr(eval(ucs4))])
- spec_chars.append([commandbl, unichr(eval(ucs4))])
- fp.close()
- return spec_chars
-
-
-unicode_reps = read_unicodesymbols()
-
-
# DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that
# have been made of it with uses of put_cmd_in_ert.
def old_put_cmd_in_ert(string):
return string
-# This routine wraps some content in an ERT inset.
-#
-# NOTE: The function accepts either a single string or a LIST of strings as
-# argument. But it returns a LIST of strings, split on \n, so that it does
-# not have embedded newlines.
-#
-# This is how lyx2lyx represents a LyX document: as a list of strings,
-# each representing a line of a LyX file. Embedded newlines confuse
-# lyx2lyx very much.
-#
-# A call to this routine will often go something like this:
-# i = find_token('\\begin_inset FunkyInset', ...)
-# ...
-# j = find_end_of_inset(document.body, i)
-# content = ...extract content from insets
-# # that could be as simple as:
-# # content = lyx2latex(document[i:j + 1])
-# ert = put_cmd_in_ert(content)
-# document.body[i:j] = ert
-# Now, before we continue, we need to reset i appropriately. Normally,
-# this would be:
-# i += len(ert)
-# That puts us right after the ERT we just inserted.
-#
-def put_cmd_in_ert(arg):
- ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""]
- # Despite the warnings just given, it will be faster for us to work
- # with a single string internally. That way, we only go through the
- # unicode_reps loop once.
- if type(arg) is list:
- s = "\n".join(arg)
- else:
- s = arg
- for rep in unicode_reps:
- s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
- s = s.replace('\\', "\\backslash\n")
- ret += s.splitlines()
- ret += ["\\end_layout", "\\end_inset"]
- return ret
-
-
-def lyx2latex(document, lines):
- 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
- # clean up multiline stuff
- content = ""
- ert_end = 0
- note_end = 0
- hspace = ""
-
- for curline in range(len(lines)):
- line = lines[curline]
- if line.startswith("\\begin_inset Note Note"):
- # We want to skip LyX notes, so remember where the inset ends
- note_end = find_end_of_inset(lines, curline + 1)
- continue
- elif note_end >= curline:
- # Skip LyX notes
- continue
- elif line.startswith("\\begin_inset ERT"):
- # We don't want to replace things inside ERT, so figure out
- # where the end of the inset is.
- ert_end = find_end_of_inset(lines, curline + 1)
- continue
- elif line.startswith("\\begin_inset Formula"):
- line = line[20:]
- elif line.startswith("\\begin_inset Quotes"):
- # For now, we do a very basic reversion. Someone who understands
- # quotes is welcome to fix it up.
- qtype = line[20:].strip()
- # lang = qtype[0]
- side = qtype[1]
- dbls = qtype[2]
- if side == "l":
- if dbls == "d":
- line = "``"
- else:
- line = "`"
- else:
- if dbls == "d":
- line = "''"
- else:
- line = "'"
- elif line.startswith("\\begin_inset space"):
- line = line[18:].strip()
- if line.startswith("\\hspace"):
- # Account for both \hspace and \hspace*
- hspace = line[:-2]
- continue
- elif line == "\\space{}":
- line = "\\ "
- elif line == "\\thinspace{}":
- line = "\\,"
- elif hspace != "":
- # The LyX length is in line[8:], after the \length keyword
- length = latex_length(line[8:])[1]
- line = hspace + "{" + length + "}"
- hspace = ""
- elif line.isspace() or \
- line.startswith("\\begin_layout") or \
- line.startswith("\\end_layout") or \
- line.startswith("\\begin_inset") or \
- line.startswith("\\end_inset") or \
- line.startswith("\\lang") or \
- line.strip() == "status collapsed" or \
- line.strip() == "status open":
- #skip all that stuff
- continue
-
- # this needs to be added to the preamble because of cases like
- # \textmu, \textbackslash, etc.
- add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
- '\\@ifundefined{textmu}',
- ' {\\usepackage{textcomp}}{}'])
- # a lossless reversion is not possible
- # try at least to handle some common insets and settings
- if ert_end >= curline:
- line = line.replace(r'\backslash', '\\')
- else:
- # No need to add "{}" after single-nonletter macros
- line = line.replace('&', '\\&')
- line = line.replace('#', '\\#')
- line = line.replace('^', '\\textasciicircum{}')
- line = line.replace('%', '\\%')
- line = line.replace('_', '\\_')
- line = line.replace('$', '\\$')
-
- # Do the LyX text --> LaTeX conversion
- for rep in unicode_reps:
- line = line.replace(rep[1], rep[0] + "{}")
- line = line.replace(r'\backslash', r'\textbackslash{}')
- line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
- line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
- line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
- line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
- line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
- line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
- line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
- line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
- line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
- content += line
- return content
-
-
-def latex_length(slen):
- '''
- Convert lengths to their LaTeX representation. Returns (bool, length),
- where the bool tells us if it was a percentage, and the length is the
- LaTeX representation.
- '''
- i = 0
- percent = False
- # the slen has the form
- # ValueUnit+ValueUnit-ValueUnit or
- # ValueUnit+-ValueUnit
- # the + and - (glue lengths) are optional
- # the + always precedes the -
-
- # Convert relative lengths to LaTeX units
- units = {"text%":"\\textwidth", "col%":"\\columnwidth",
- "page%":"\\paperwidth", "line%":"\\linewidth",
- "theight%":"\\textheight", "pheight%":"\\paperheight"}
- for unit in units.keys():
- i = slen.find(unit)
- if i == -1:
- continue
- percent = True
- minus = slen.rfind("-", 1, i)
- plus = slen.rfind("+", 0, i)
- latex_unit = units[unit]
- if plus == -1 and minus == -1:
- value = slen[:i]
- value = str(float(value)/100)
- end = slen[i + len(unit):]
- slen = value + latex_unit + end
- if plus > minus:
- value = slen[plus + 1:i]
- value = str(float(value)/100)
- begin = slen[:plus + 1]
- end = slen[i+len(unit):]
- slen = begin + value + latex_unit + end
- if plus < minus:
- value = slen[minus + 1:i]
- value = str(float(value)/100)
- begin = slen[:minus + 1]
- slen = begin + value + latex_unit
-
- # replace + and -, but only if the - is not the first character
- slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
- # handle the case where "+-1mm" was used, because LaTeX only understands
- # "plus 1mm minus 1mm"
- if slen.find("plus minus"):
- lastvaluepos = slen.rfind(" ")
- lastvalue = slen[lastvaluepos:]
- slen = slen.replace(" ", lastvalue + " ")
- return (percent, slen)
-
-
-def revert_flex_inset(document, name, LaTeXname, position):
- " Convert flex insets to TeX code "
- i = position
- while True:
- i = find_token(document.body, '\\begin_inset Flex ' + name, i)
- if i == -1:
- return
- z = find_end_of_inset(document.body, i)
- if z == -1:
- document.warning("Malformed LyX document: Can't find end of Flex " + name + " inset.")
- return
- # remove the \end_inset
- document.body[z - 2:z + 1] = put_cmd_in_ert("}")
- # we need to reset character layouts if necessary
- j = find_token(document.body, '\\emph on', i, z)
- k = find_token(document.body, '\\noun on', i, z)
- l = find_token(document.body, '\\series', i, z)
- m = find_token(document.body, '\\family', i, z)
- n = find_token(document.body, '\\shape', i, z)
- o = find_token(document.body, '\\color', i, z)
- p = find_token(document.body, '\\size', i, z)
- q = find_token(document.body, '\\bar under', i, z)
- r = find_token(document.body, '\\uuline on', i, z)
- s = find_token(document.body, '\\uwave on', i, z)
- t = find_token(document.body, '\\strikeout on', i, z)
- if j != -1:
- document.body.insert(z - 2, "\\emph default")
- if k != -1:
- document.body.insert(z - 2, "\\noun default")
- if l != -1:
- document.body.insert(z - 2, "\\series default")
- if m != -1:
- document.body.insert(z - 2, "\\family default")
- if n != -1:
- document.body.insert(z - 2, "\\shape default")
- if o != -1:
- document.body.insert(z - 2, "\\color inherit")
- if p != -1:
- document.body.insert(z - 2, "\\size default")
- if q != -1:
- document.body.insert(z - 2, "\\bar default")
- if r != -1:
- document.body.insert(z - 2, "\\uuline default")
- if s != -1:
- document.body.insert(z - 2, "\\uwave default")
- if t != -1:
- document.body.insert(z - 2, "\\strikeout default")
- document.body[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
- i += 1
-
-
-def revert_font_attrs(document, name, LaTeXname):
- " Reverts font changes to TeX code "
- i = 0
- changed = False
- while True:
- i = find_token(document.body, name + ' on', i)
- if i == -1:
- return changed
- j = find_token(document.body, name + ' default', i)
- k = find_token(document.body, name + ' on', i + 1)
- # if there is no default set, the style ends with the layout
- # assure hereby that we found the correct layout end
- if j != -1 and (j < k or k == -1):
- document.body[j:j + 1] = put_cmd_in_ert("}")
- else:
- j = find_token(document.body, '\\end_layout', i)
- document.body[j:j] = put_cmd_in_ert("}")
- document.body[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
- changed = True
- i += 1
-
-
-def revert_layout_command(document, name, LaTeXname, position):
- " Reverts a command from a layout to TeX code "
- i = position
- while True:
- i = find_token(document.body, '\\begin_layout ' + name, i)
- if i == -1:
- return
- k = -1
- # find the next layout
- j = i + 1
- while k == -1:
- j = find_token(document.body, '\\begin_layout', j)
- l = len(document.body)
- # if nothing was found it was the last layout of the document
- if j == -1:
- document.body[l - 4:l - 4] = put_cmd_in_ert("}")
- k = 0
- # exclude plain layout because this can be TeX code or another inset
- elif document.body[j] != '\\begin_layout Plain Layout':
- document.body[j - 2:j - 2] = put_cmd_in_ert("}")
- k = 0
- else:
- j += 1
- document.body[i] = '\\begin_layout Standard'
- document.body[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
- i += 1
-
-
-def hex2ratio(s):
- val = string.atoi(s, 16)
- if val != 0:
- val += 1
- return str(val / 256.0)
-
-
###############################################################################
###
### Conversion and reversion routines