1 # This file is part of lyx2lyx
2 # Copyright (C) 2011 The LyX team
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License
6 # as published by the Free Software Foundation; either version 2
7 # of the License, or (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 This module offers several free functions to help with lyx2lyx'ing.
20 More documentaton is below, but here is a quick guide to what
21 they do. Optional arguments are marked by brackets.
23 add_to_preamble(document, text):
24 Here, text can be either a single line or a list of lines. It
25 is bad practice to pass something with embedded newlines, but
26 we will handle that properly.
27 The routine checks to see whether the provided material is
28 already in the preamble. If not, it adds it.
29 Prepends a comment "% Added by lyx2lyx" to text.
31 insert_to_preamble(document, text[, index]):
32 Here, text can be either a single line or a list of lines. It
33 is bad practice to pass something with embedded newlines, but
34 we will handle that properly.
35 The routine inserts text at document.preamble[index], where by
36 default index is 0, so the material is inserted at the beginning.
37 Prepends a comment "% Added by lyx2lyx" to text.
40 Here cmd should be a list of strings (lines), which we want to
41 wrap in ERT. Returns a list of strings so wrapped.
42 A call to this routine will often go something like this:
43 i = find_token('\\begin_inset FunkyInset', ...)
44 j = find_end_of_inset(document.body, i)
45 content = lyx2latex(document[i:j + 1])
46 ert = put_cmd_in_ert(content)
47 document.body[i:j+1] = ert
49 get_ert(lines, i[, verbatim]):
50 Here, lines is a list of lines of LyX material containing an ERT inset,
51 whose content we want to convert to LaTeX. The ERT starts at index i.
52 If the optional (by default: False) bool verbatim is True, the content
53 of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
54 for the use in verbatim insets.
56 lyx2latex(document, lines):
57 Here, lines is a list of lines of LyX material we want to convert
58 to LaTeX. We do the best we can and return a string containing
59 the translated material.
61 lyx2verbatim(document, lines):
62 Here, lines is a list of lines of LyX material we want to convert
63 to verbatim material (used in ERT an the like). We do the best we
64 can and return a string containing the translated material.
67 Convert lengths (in LyX form) to their LaTeX representation. Returns
68 (bool, length), where the bool tells us if it was a percentage, and
69 the length is the LaTeX representation.
71 convert_info_insets(document, type, func):
72 Applies func to the argument of all info insets matching certain types
73 type : the type to match. This can be a regular expression.
74 func : function from string to string to apply to the "arg" field of
77 is_document_option(document, option):
78 Find if _option_ is a document option (\\options in the header).
80 insert_document_option(document, option):
81 Insert _option_ as a document option.
83 remove_document_option(document, option):
84 Remove _option_ as a document option.
86 revert_language(document, lyxname, babelname="", polyglossianame=""):
87 Reverts native language support to ERT
88 If babelname or polyglossianame is empty, it is assumed
89 this language package is not supported for the given language.
94 from parser_tools import (
97 get_containing_layout,
102 from unicode_symbols import unicode_reps
105 # This will accept either a list of lines or a single line.
106 # It is bad practice to pass something with embedded newlines,
107 # though we will handle that.
108 def add_to_preamble(document, text):
109 "Add text to the preamble if it is not already there."
111 if not type(text) is list:
112 # split on \n just in case
113 # it'll give us the one element list we want
114 # if there's no \n, too
115 text = text.split("\n")
118 prelen = len(document.preamble)
120 i = find_token(document.preamble, text[0], i)
123 # we need a perfect match
126 if i >= prelen or line != document.preamble[i]:
133 document.preamble.extend(["% Added by lyx2lyx"])
134 document.preamble.extend(text)
137 # Note that text can be either a list of lines or a single line.
138 # It should really be a list.
139 def insert_to_preamble(document, text, index=0):
140 """Insert text to the preamble at a given line"""
142 if not type(text) is list:
143 # split on \n just in case
144 # it'll give us the one element list we want
145 # if there's no \n, too
146 text = text.split("\n")
148 text.insert(0, "% Added by lyx2lyx")
149 document.preamble[index:index] = text
152 # A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
153 # Created from the reversed list to keep the first of alternative definitions.
154 licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
157 def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
159 Return ERT inset wrapping `cmd` as a list of strings.
161 `cmd` can be a string or list of lines. Non-ASCII characters are converted
162 to the respective LICR macros if defined in unicodesymbols,
163 `is_open` is a boolean setting the inset status to "open",
164 `as_paragraph` wraps the ERT inset in a Standard paragraph.
167 status = {False: "collapsed", True: "open"}
170 "status %s" % status[is_open],
172 "\\begin_layout Plain Layout",
174 # content here ([5:5])
181 "\\begin_layout Standard",
182 # content here ([1:1])
188 # ensure cmd is an unicode instance and make it "LyX safe".
189 if isinstance(cmd, list):
191 cmd = cmd.translate(licr_table)
192 cmd = cmd.replace("\\", "\n\\backslash\n")
194 ert_inset[5:5] = cmd.splitlines()
197 paragraph[1:1] = ert_inset
201 def get_ert(lines, i, verbatim=False):
202 "Convert an ERT inset into LaTeX."
203 if not lines[i].startswith("\\begin_inset ERT"):
205 j = find_end_of_inset(lines, i)
208 while i < j and not lines[i].startswith("status"):
214 if lines[i] == "\\begin_layout Plain Layout":
219 while i + 1 < j and lines[i + 1] == "":
221 elif lines[i] == "\\end_layout":
222 while i + 1 < j and lines[i + 1] == "":
224 elif lines[i] == "\\backslash":
226 ret = ret + "\n" + lines[i] + "\n"
235 def lyx2latex(document, lines):
236 "Convert some LyX stuff into corresponding LaTeX stuff, as best we can."
243 for curline in range(len(lines)):
244 line = lines[curline]
245 if line.startswith("\\begin_inset Note Note"):
246 # We want to skip LyX notes, so remember where the inset ends
247 note_end = find_end_of_inset(lines, curline + 1)
249 elif note_end >= curline:
252 elif line.startswith("\\begin_inset ERT"):
253 # We don't want to replace things inside ERT, so figure out
254 # where the end of the inset is.
255 ert_end = find_end_of_inset(lines, curline + 1)
257 elif line.startswith("\\begin_inset Formula"):
259 elif line.startswith("\\begin_inset Quotes"):
260 # For now, we do a very basic reversion. Someone who understands
261 # quotes is welcome to fix it up.
262 qtype = line[20:].strip()
276 elif line.startswith("\\begin_inset Newline newline"):
278 elif line.startswith("\\noindent"):
279 line = "\\noindent " # we need the space behind the command
280 elif line.startswith("\\begin_inset space"):
281 line = line[18:].strip()
282 if line.startswith("\\hspace"):
283 # Account for both \hspace and \hspace*
286 elif line == "\\space{}":
288 elif line == "\\thinspace{}":
291 # The LyX length is in line[8:], after the \length keyword
292 length = latex_length(line[8:])[1]
293 line = hspace + "{" + length + "}"
297 or line.startswith("\\begin_layout")
298 or line.startswith("\\end_layout")
299 or line.startswith("\\begin_inset")
300 or line.startswith("\\end_inset")
301 or line.startswith("\\lang")
302 or line.strip() == "status collapsed"
303 or line.strip() == "status open"
305 # skip all that stuff
308 # this needs to be added to the preamble because of cases like
309 # \textmu, \textbackslash, etc.
313 "% added by lyx2lyx for converted index entries",
314 "\\@ifundefined{textmu}",
315 " {\\usepackage{textcomp}}{}",
318 # a lossless reversion is not possible
319 # try at least to handle some common insets and settings
320 if ert_end >= curline:
321 line = line.replace(r"\backslash", "\\")
323 # No need to add "{}" after single-nonletter macros
324 line = line.replace("&", "\\&")
325 line = line.replace("#", "\\#")
326 line = line.replace("^", "\\textasciicircum{}")
327 line = line.replace("%", "\\%")
328 line = line.replace("_", "\\_")
329 line = line.replace("$", "\\$")
331 # Do the LyX text --> LaTeX conversion
332 for rep in unicode_reps:
333 line = line.replace(rep[1], rep[0])
334 line = line.replace(r"\backslash", r"\textbackslash{}")
335 line = line.replace(r"\series bold", r"\bfseries{}").replace(
336 r"\series default", r"\mdseries{}"
338 line = line.replace(r"\shape italic", r"\itshape{}").replace(
339 r"\shape smallcaps", r"\scshape{}"
341 line = line.replace(r"\shape slanted", r"\slshape{}").replace(
342 r"\shape default", r"\upshape{}"
344 line = line.replace(r"\emph on", r"\em{}").replace(r"\emph default", r"\em{}")
345 line = line.replace(r"\noun on", r"\scshape{}").replace(
346 r"\noun default", r"\upshape{}"
348 line = line.replace(r"\bar under", r"\underbar{").replace(r"\bar default", r"}")
349 line = line.replace(r"\family sans", r"\sffamily{}").replace(
350 r"\family default", r"\normalfont{}"
352 line = line.replace(r"\family typewriter", r"\ttfamily{}").replace(
353 r"\family roman", r"\rmfamily{}"
355 line = line.replace(r"\InsetSpace ", r"").replace(r"\SpecialChar ", r"")
360 def lyx2verbatim(document, lines):
361 "Convert some LyX stuff into corresponding verbatim stuff, as best we can."
363 content = lyx2latex(document, lines)
364 content = re.sub(r"\\(?!backslash)", r"\n\\backslash\n", content)
369 def latex_length(slen):
371 Convert lengths to their LaTeX representation. Returns (bool, length),
372 where the bool tells us if it was a percentage, and the length is the
373 LaTeX representation.
377 # the slen has the form
378 # ValueUnit+ValueUnit-ValueUnit or
379 # ValueUnit+-ValueUnit
380 # the + and - (glue lengths) are optional
381 # the + always precedes the -
383 # Convert relative lengths to LaTeX units
385 "col%": "\\columnwidth",
386 "text%": "\\textwidth",
387 "page%": "\\paperwidth",
388 "line%": "\\linewidth",
389 "theight%": "\\textheight",
390 "pheight%": "\\paperheight",
391 "baselineskip%": "\\baselineskip",
393 for unit in list(units.keys()):
398 minus = slen.rfind("-", 1, i)
399 plus = slen.rfind("+", 0, i)
400 latex_unit = units[unit]
401 if plus == -1 and minus == -1:
403 value = str(float(value) / 100)
404 end = slen[i + len(unit) :]
405 slen = value + latex_unit + end
407 value = slen[plus + 1 : i]
408 value = str(float(value) / 100)
409 begin = slen[: plus + 1]
410 end = slen[i + len(unit) :]
411 slen = begin + value + latex_unit + end
413 value = slen[minus + 1 : i]
414 value = str(float(value) / 100)
415 begin = slen[: minus + 1]
416 slen = begin + value + latex_unit
418 # replace + and -, but only if the - is not the first character
419 slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
420 # handle the case where "+-1mm" was used, because LaTeX only understands
421 # "plus 1mm minus 1mm"
422 if slen.find("plus minus"):
423 lastvaluepos = slen.rfind(" ")
424 lastvalue = slen[lastvaluepos:]
425 slen = slen.replace(" ", lastvalue + " ")
426 return (percent, slen)
429 def length_in_bp(length):
430 "Convert a length in LyX format to its value in bp units"
432 em_width = 10.0 / 72.27 # assume 10pt font size
433 text_width = 8.27 / 1.7 # assume A4 with default margins
434 # scale factors are taken from Length::inInch()
437 "cc": (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
439 "dd": (72.0 / (72.27 / (0.376 * 2.845))),
440 "em": (72.0 * em_width),
441 "ex": (72.0 * em_width * 0.4305),
444 "mu": (72.0 * em_width / 18.0),
445 "pc": (72.0 / (72.27 / 12.0)),
446 "pt": (72.0 / (72.27)),
447 "sp": (72.0 / (72.27 * 65536.0)),
448 "text%": (72.0 * text_width / 100.0),
449 "col%": (72.0 * text_width / 100.0), # assume 1 column
450 "page%": (72.0 * text_width * 1.7 / 100.0),
451 "line%": (72.0 * text_width / 100.0),
452 "theight%": (72.0 * text_width * 1.787 / 100.0),
453 "pheight%": (72.0 * text_width * 2.2 / 100.0),
456 rx = re.compile(r"^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$")
459 document.warning("Invalid length value: " + length + ".")
463 if not unit in scales.keys():
464 document.warning("Unknown length unit: " + unit + ".")
466 return "%g" % (float(value) * scales[unit])
469 def revert_flex_inset(lines, name, LaTeXname):
470 "Convert flex insets to TeX code"
473 i = find_token(lines, "\\begin_inset Flex " + name, i)
476 z = find_end_of_inset(lines, i)
478 document.warning("Can't find end of Flex " + name + " inset.")
481 # remove the \end_inset
482 lines[z - 2 : z + 1] = put_cmd_in_ert("}")
483 # we need to reset character layouts if necessary
484 j = find_token(lines, "\\emph on", i, z)
485 k = find_token(lines, "\\noun on", i, z)
486 l = find_token(lines, "\\series", i, z)
487 m = find_token(lines, "\\family", i, z)
488 n = find_token(lines, "\\shape", i, z)
489 o = find_token(lines, "\\color", i, z)
490 p = find_token(lines, "\\size", i, z)
491 q = find_token(lines, "\\bar under", i, z)
492 r = find_token(lines, "\\uuline on", i, z)
493 s = find_token(lines, "\\uwave on", i, z)
494 t = find_token(lines, "\\strikeout on", i, z)
496 lines.insert(z - 2, "\\emph default")
498 lines.insert(z - 2, "\\noun default")
500 lines.insert(z - 2, "\\series default")
502 lines.insert(z - 2, "\\family default")
504 lines.insert(z - 2, "\\shape default")
506 lines.insert(z - 2, "\\color inherit")
508 lines.insert(z - 2, "\\size default")
510 lines.insert(z - 2, "\\bar default")
512 lines.insert(z - 2, "\\uuline default")
514 lines.insert(z - 2, "\\uwave default")
516 lines.insert(z - 2, "\\strikeout default")
517 lines[i : i + 4] = put_cmd_in_ert(LaTeXname + "{")
521 def revert_font_attrs(lines, name, LaTeXname):
522 "Reverts font changes to TeX code"
526 i = find_token(lines, name + " on", i)
529 j = find_token(lines, name + " default", i)
530 k = find_token(lines, name + " on", i + 1)
531 # if there is no default set, the style ends with the layout
532 # assure hereby that we found the correct layout end
533 if j != -1 and (j < k or k == -1):
534 lines[j : j + 1] = put_cmd_in_ert("}")
536 j = find_token(lines, "\\end_layout", i)
537 lines[j:j] = put_cmd_in_ert("}")
538 lines[i : i + 1] = put_cmd_in_ert(LaTeXname + "{")
542 # now delete all remaining lines that manipulate this attribute
545 i = find_token(lines, name, i)
553 def revert_layout_command(lines, name, LaTeXname):
554 "Reverts a command from a layout to TeX code"
557 i = find_token(lines, "\\begin_layout " + name, i)
561 # find the next layout
564 j = find_token(lines, "\\begin_layout", j)
566 # if nothing was found it was the last layout of the document
568 lines[l - 4 : l - 4] = put_cmd_in_ert("}")
570 # exclude plain layout because this can be TeX code or another inset
571 elif lines[j] != "\\begin_layout Plain Layout":
572 lines[j - 2 : j - 2] = put_cmd_in_ert("}")
576 lines[i] = "\\begin_layout Standard"
577 lines[i + 1 : i + 1] = put_cmd_in_ert(LaTeXname + "{")
582 "Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0]"
589 return str(val / 256.0)
593 "'true' goes to True, case-insensitively, and we strip whitespace."
594 s = s.strip().lower()
598 def convert_info_insets(document, type, func):
599 "Convert info insets matching type using func."
601 type_re = re.compile(r'^type\s+"(%s)"$' % type)
602 arg_re = re.compile(r'^arg\s+"(.*)"$')
604 i = find_token(document.body, "\\begin_inset Info", i)
607 t = type_re.match(document.body[i + 1])
609 arg = arg_re.match(document.body[i + 2])
611 new_arg = func(arg.group(1))
612 document.body[i + 2] = 'arg "%s"' % new_arg
616 def insert_document_option(document, option):
617 "Insert _option_ as a document option."
619 # Find \options in the header
620 i = find_token(document.header, "\\options", 0)
621 # if the options does not exists add it after the textclass
623 i = find_token(document.header, "\\textclass", 0) + 1
624 document.header.insert(i, r"\options %s" % option)
626 # otherwise append to options
627 if not is_document_option(document, option):
628 document.header[i] += ",%s" % option
631 def remove_document_option(document, option):
632 """Remove _option_ as a document option."""
634 i = find_token(document.header, "\\options")
635 options = get_value(document.header, "\\options", i)
636 options = [op.strip() for op in options.split(",")]
638 # Remove `option` from \options
639 options = [op for op in options if op != option]
642 document.header[i] = "\\options " + ",".join(options)
644 del document.header[i]
647 def is_document_option(document, option):
648 "Find if _option_ is a document option"
650 options = get_value(document.header, "\\options")
651 options = [op.strip() for op in options.split(",")]
652 return option in options
657 for s in "Argument, Caption Above, Caption Below, Caption Bicaption,"
658 "Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
659 "Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
660 "Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
661 "Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
662 "Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
663 "IPADeco, Index, Info, Phantom, Script".split(",")
665 # print(singlepar_insets)
668 def revert_language(document, lyxname, babelname="", polyglossianame=""):
669 "Revert native language support"
671 # Does the document use polyglossia?
672 use_polyglossia = False
673 if get_bool_value(document.header, "\\use_non_tex_fonts"):
674 i = find_token(document.header, "\\language_package")
676 document.warning("Malformed document! Missing \\language_package")
678 pack = get_value(document.header, "\\language_package", i)
679 if pack in ("default", "auto"):
680 use_polyglossia = True
682 # Do we use this language with polyglossia?
683 with_polyglossia = use_polyglossia and polyglossianame != ""
684 # Do we use this language with babel?
685 with_babel = with_polyglossia == False and babelname != ""
687 # Are we dealing with a primary or secondary language?
688 primary = document.language == lyxname
691 # Main language first
692 orig_doc_language = document.language
694 # Change LyX document language to English (we will tell LaTeX
695 # to use the original language at the end of this function):
696 document.language = "english"
697 i = find_token(document.header, "\\language %s" % lyxname, 0)
699 document.header[i] = "\\language english"
701 # Now look for occurences in the body
704 i = find_token(document.body, "\\lang", i + 1)
707 if document.body[i].startswith("\\lang %s" % lyxname):
709 texname = use_polyglossia and polyglossianame or babelname
710 elif primary and document.body[i].startswith("\\lang english"):
711 # Since we switched the main language manually, English parts need to be marked
716 parent = get_containing_layout(document.body, i)
717 i_e = parent[2] # end line no,
718 # print(i, texname, parent, document.body[i+1], file=sys.stderr)
720 # Move leading space to the previous line:
721 if document.body[i + 1].startswith(" "):
722 document.body[i + 1] = document.body[i + 1][1:]
723 document.body.insert(i, " ")
726 # TODO: handle nesting issues with font attributes, e.g.
727 # \begin_layout Standard
733 # — јужнословенски јазик, дел од групата на словенски јазици од јазичното
734 # семејство на индоевропски јазици.
735 # Македонскиот е службен и национален јазик во Македонија.
738 # Ensure correct handling of list labels
739 if parent[0] in ["Labeling", "Description"] and not " " in "\n".join(
740 document.body[parent[3] : i]
742 # line `i+1` is first line of a list item,
743 # part before a space character is the label
744 # TODO: insets or language change before first space character
745 labelline = document.body[i + 1].split(" ", 1)
746 if len(labelline) > 1:
747 # Insert a space in the (original) document language
748 # between label and remainder.
749 # print(" Label:", labelline, file=sys.stderr)
752 "\\lang %s" % orig_doc_language,
754 "\\lang %s" % (primary and "english" or lyxname),
757 document.body[i + 1 : i + 2] = lines
760 # Find out where to end the language change.
763 langswitch = find_token(document.body, "\\lang", langswitch + 1, i_e)
766 # print(" ", langswitch, document.body[langswitch], file=sys.stderr)
768 i_a = parent[3] # paragraph start line
769 container = get_containing_inset(document.body[i_a:i_e], langswitch - i_a)
772 and container[1] < langswitch - i_a
773 and container[2] > langswitch - i_a
775 # print(" inset", container, file=sys.stderr)
780 # use function or environment?
781 singlepar = i_e - i < 3
782 if not singlepar and parent[0] == "Plain Layout":
783 # environment not allowed in some insets
784 container = get_containing_inset(document.body, i)
785 singlepar = container[0] in singlepar_insets
787 # Delete empty language switches:
788 if not "".join(document.body[i + 1 : i_e]):
789 del document.body[i:i_e]
795 begin_cmd = "\\text%s{" % texname
797 begin_cmd = "\\foreignlanguage{%s}{" % texname
801 begin_cmd = "\\begin{%s}" % texname
802 end_cmd = "\\end{%s}" % texname
804 begin_cmd = "\\begin{otherlanguage}{%s}" % texname
805 end_cmd = "\\end{otherlanguage}"
807 if not primary or texname == "english":
809 document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
810 document.body[i + 1 : i + 1] = put_cmd_in_ert(begin_cmd)
811 except UnboundLocalError:
815 if not (primary or secondary):
818 # Make the language known to Babel/Polyglossia and ensure the correct
822 # add as global option
823 insert_document_option(document, babelname)
824 # Since user options are appended to the document options,
825 # Babel will treat `babelname` as primary language.
827 doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
829 # Define language in the user preamble
830 # (don't use \AtBeginDocument, this fails with some languages).
833 ["\\usepackage{polyglossia}", "\\setotherlanguage{%s}" % polyglossianame],
836 # Changing the main language must be done in the document body.
837 doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
839 # Reset LaTeX main language if required and not already done
840 if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
841 document.body[2:2] = put_cmd_in_ert(doc_lang_switch, is_open=True, as_paragraph=True)