+def read_unicodesymbols():
+ " Read the unicodesymbols list of unicode characters and corresponding commands."
+ pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+ fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+ spec_chars = []
+ # Two backslashes, followed by some non-word character, and then a character
+ # in brackets. The idea is to check for constructs like: \"{u}, which is how
+ # they are written in the unicodesymbols file; but they can also be written
+ # as: \"u or even \" u.
+ r = re.compile(r'\\\\(\W)\{(\w)\}')
+ for line in fp.readlines():
+ if line[0] != '#' and line.strip() != "":
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+ try:
+ [ucs4,command,dead] = line.split(None,2)
+ if command[0:1] != "\\":
+ continue
+ spec_chars.append([command, unichr(eval(ucs4))])
+ except:
+ continue
+ m = r.match(command)
+ if m != None:
+ command = "\\\\"
+ # If the character is a double-quote, then we need to escape it, too,
+ # since it is done that way in the LyX file.
+ if m.group(1) == "\"":
+ command += "\\"
+ commandbl = command
+ command += m.group(1) + m.group(2)
+ commandbl += m.group(1) + ' ' + m.group(2)
+ spec_chars.append([command, unichr(eval(ucs4))])
+ spec_chars.append([commandbl, unichr(eval(ucs4))])
+ fp.close()
+ return spec_chars
+
+
+def extract_argument(line):
+ 'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
+
+ if not line:
+ return (None, "")
+
+ bracere = re.compile("(\s*)(.*)")
+ n = bracere.match(line)
+ whitespace = n.group(1)
+ stuff = n.group(2)
+ brace = stuff[:1]
+ if brace != "[" and brace != "{":
+ return (None, line)
+
+ # find closing brace
+ remain = stuff[1:]
+ pos = 0
+ num = 1
+ term = "}"
+ if brace == "[":
+ term = "]"
+ skip = False
+ for c in remain:
+ if skip:
+ skip = False
+ elif c == "\\":
+ skip = True
+ elif c == brace:
+ num += 1
+ elif c == term:
+ num -= 1
+ if c == 0:
+ break
+ pos += 1
+ if num != 0:
+ # We never found the matching brace
+ # So, to be on the safe side, let's just return everything
+ # which will then get wrapped as ERT
+ return (line, "")
+ return (line[:pos + 1], line[pos + 1:])
+
+
+def latex2ert(line, isindex):
+ '''Converts LaTeX commands into ERT. line may well be a multi-line
+ string when it is returned.'''
+ if not line:
+ return line
+
+ retval = ""
+ ## FIXME Escaped \ ??
+ # This regex looks for a LaTeX command---i.e., something of the form
+ # "\alPhaStuFF", or "\X", where X is any character---where the command
+ # may also be preceded by an additional backslash, which is how it would
+ # appear (e.g.) in an InsetIndex.
+ labelre = re.compile(r'(.*?)\\?(\\(?:[a-zA-Z]+|.))(.*)')
+
+ m = labelre.match(line)
+ while m != None:
+ retval += m.group(1)
+ cmd = m.group(2)
+ end = m.group(3)
+
+ while True:
+ (arg, rest) = extract_argument(end)
+ if arg == None:
+ break
+ cmd += arg
+ end = rest
+ # If we wanted to put labels into an InsetLabel, for example, then we
+ # would just need to test here for cmd == "label" and then take some
+ # appropriate action, i.e., to use arg to get the content and then
+ # wrap it appropriately.
+ cmd = put_cmd_in_ert(cmd)
+ retval += "\n" + cmd + "\n"
+ line = end
+ m = labelre.match(line)
+ # put all remaining braces in ERT
+ line = wrap_into_ert(line, '}', '}')
+ line = wrap_into_ert(line, '{', '{')
+ if isindex:
+ # active character that is not available in all font encodings
+ line = wrap_into_ert(line, '|', '|')
+ retval += line
+ return retval
+
+
+unicode_reps = read_unicodesymbols()
+
+#Bug 5022....
+#Might should do latex2ert first, then deal with stuff that DOESN'T
+#end up inside ERT. That routine could be modified so that it returned
+#a list of lines, and we could then skip ERT bits and only deal with
+#the other bits.
+def latex2lyx(data, isindex):
+ '''Takes a string, possibly multi-line, and returns the result of
+ converting LaTeX constructs into LyX constructs. Returns a list of
+ lines, suitable for insertion into document.body.
+ The bool isindex specifies whether we are in an index macro (which
+ has some specific active characters that need to be ERTed).'''
+
+ if not data:
+ return [""]
+ retval = []
+
+ # Convert LaTeX to Unicode
+ # Commands of this sort need to be checked to make sure they are
+ # followed by a non-alpha character, lest we replace too much.
+ hardone = re.compile(r'^\\\\[a-zA-Z]+$')
+
+ for rep in unicode_reps:
+ if hardone.match(rep[0]):
+ pos = 0
+ while True:
+ pos = data.find(rep[0], pos)
+ if pos == -1:
+ break
+ nextpos = pos + len(rep[0])
+ if nextpos < len(data) and data[nextpos].isalpha():
+ # not the end of that command
+ pos = nextpos
+ continue
+ data = data[:pos] + rep[1] + data[nextpos:]
+ pos = nextpos
+ else:
+ data = data.replace(rep[0], rep[1])
+
+ # Generic
+ # \" -> ":
+ data = wrap_into_ert(data, r'\"', '"')
+ # \\ -> \:
+ data = data.replace('\\\\', '\\')
+
+ # Math:
+ mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+ lines = data.split('\n')
+ for line in lines:
+ #document.warning("LINE: " + line)
+ #document.warning(str(i) + ":" + document.body[i])
+ #document.warning("LAST: " + document.body[-1])
+ g = line
+ m = mathre.match(g)
+ while m != None:
+ s = m.group(1)
+ f = m.group(2).replace('\\\\', '\\')
+ g = m.group(3)
+ if s:
+ # this is non-math!
+ s = latex2ert(s, isindex)
+ subst = s.split('\n')
+ retval += subst
+ retval.append("\\begin_inset Formula " + f)
+ retval.append("\\end_inset")
+ m = mathre.match(g)
+ # Handle whatever is left, which is just text
+ g = latex2ert(g, isindex)
+ subst = g.split('\n')
+ retval += subst
+ return retval
+
+
+def lyxline2latex(document, line, inert):
+ 'Convert some LyX stuff into corresponding LaTeX stuff line-wise, as best we can.'
+ if line.startswith("\\begin_inset Formula"):
+ line = line[20:]
+ elif line.startswith("\\begin_inset Quotes"):
+ # For now, we do a very basic reversion. Someone who understands
+ # quotes is welcome to fix it up.
+ qtype = line[20:].strip()
+ # lang = qtype[0]
+ side = qtype[1]
+ dbls = qtype[2]
+ if side == "l":
+ if dbls == "d":
+ line = "``"
+ else:
+ line = "`"
+ else:
+ if dbls == "d":
+ line = "''"
+ else:
+ line = "'"
+ elif line.isspace() or \
+ line.startswith("\\begin_layout") or \
+ line.startswith("\\end_layout") or \
+ line.startswith("\\begin_inset") or \
+ line.startswith("\\end_inset") or \
+ line.startswith("\\lang") or \
+ line.strip() == "status collapsed" or \
+ line.strip() == "status open":
+ #skip all that stuff
+ return ""
+
+ # this needs to be added to the preamble because of cases like
+ # \textmu, \textbackslash, etc.
+ add_to_preamble(document, ['% added by lyx2lyx for converted entries',
+ '\\@ifundefined{textmu}',
+ ' {\\usepackage{textcomp}}{}'])
+ # a lossless reversion is not possible
+ # try at least to handle some common insets and settings
+ if inert:
+ line = line.replace(r'\backslash', '\\')
+ else:
+ line = line.replace('&', '\\&{}')
+ line = line.replace('#', '\\#{}')
+ line = line.replace('^', '\\^{}')
+ line = line.replace('%', '\\%{}')
+ line = line.replace('_', '\\_{}')
+ line = line.replace('$', '\\${}')
+
+ # Do the LyX text --> LaTeX conversion
+ for rep in unicode_reps:
+ line = line.replace(rep[1], rep[0].replace('\\\\', '\\') + "{}")
+ line = line.replace(r'\backslash', r'\textbackslash{}')
+ line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
+ line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
+ line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
+ line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
+ line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
+ line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
+ line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
+ line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
+ line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
+ return line
+
+
+def lyx2latex(document, lines):
+ 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
+ # clean up multiline stuff
+ content = ""
+ ert_end = 0
+
+ for curline in range(len(lines)):
+ line = lines[curline]
+ if line.startswith("\\begin_inset ERT"):
+ # We don't want to replace things inside ERT, so figure out
+ # where the end of the inset is.
+ ert_end = find_end_of_inset(lines, curline + 1)
+ continue
+ inert = ert_end >= curline
+ content += lyxline2latex(document, lines[curline], inert)
+
+ return content
+
+