lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(cmd):
  41   Here cmd should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 get_ert(lines, i[, verbatim]):
  51   Here, lines is a list of lines of LyX material containing an ERT inset,
  52   whose content we want to convert to LaTeX. The ERT starts at index i.
  53   If the optional (by default: False) bool verbatim is True, the content
  54   of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
  55   for the use in verbatim insets.
  56
  57 lyx2latex(document, lines):
  58   Here, lines is a list of lines of LyX material we want to convert
  59   to LaTeX. We do the best we can and return a string containing
  60   the translated material.
  61
  62 lyx2verbatim(document, lines):
  63   Here, lines is a list of lines of LyX material we want to convert
  64   to verbatim material (used in ERT an the like). We do the best we
  65   can and return a string containing the translated material.
  66
  67 latex_length(slen):
  68   Convert lengths (in LyX form) to their LaTeX representation. Returns
  69   (bool, length), where the bool tells us if it was a percentage, and
  70   the length is the LaTeX representation.
  71
  72 convert_info_insets(document, type, func):
  73   Applies func to the argument of all info insets matching certain types
  74   type : the type to match. This can be a regular expression.
  75   func : function from string to string to apply to the "arg" field of
  76          the info insets.
  77
  78 is_document_option(document, option):
  79   Find if _option_ is a document option (\\options in the header).
  80
  81 insert_document_option(document, option):
  82   Insert _option_ as a document option.
  83
  84 remove_document_option(document, option):
  85   Remove _option_ as a document option.
  86
  87 get_language_for_line(document, i):
  88   Return the language setting for line number i.
  89 '''
  90
  91 import re
  92 from parser_tools import find_token, find_end_of_inset, get_containing_layout
  93 from unicode_symbols import unicode_reps
  94
  95 # This will accept either a list of lines or a single line.
  96 # It is bad practice to pass something with embedded newlines,
  97 # though we will handle that.
  98 def add_to_preamble(document, text):
  99     " Add text to the preamble if it is not already there. "
 100
 101     if not type(text) is list:
 102       # split on \n just in case
 103       # it'll give us the one element list we want
 104       # if there's no \n, too
 105       text = text.split('\n')
 106
 107     i = 0
 108     prelen = len(document.preamble)
 109     while True:
 110       i = find_token(document.preamble, text[0], i)
 111       if i == -1:
 112         break
 113       # we need a perfect match
 114       matched = True
 115       for line in text:
 116         if i >= prelen or line != document.preamble[i]:
 117           matched = False
 118           break
 119         i += 1
 120       if matched:
 121         return
 122
 123     document.preamble.extend(["% Added by lyx2lyx"])
 124     document.preamble.extend(text)
 125
 126
 127 # Note that text can be either a list of lines or a single line.
 128 # It should really be a list.
 129 def insert_to_preamble(document, text, index = 0):
 130     """ Insert text to the preamble at a given line"""
 131
 132     if not type(text) is list:
 133       # split on \n just in case
 134       # it'll give us the one element list we want
 135       # if there's no \n, too
 136       text = text.split('\n')
 137
 138     text.insert(0, "% Added by lyx2lyx")
 139     document.preamble[index:index] = text
 140
 141
 142 # A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
 143 # Created from the reversed list to keep the first of alternative definitions.
 144 licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
 145
 146 def put_cmd_in_ert(cmd):
 147     """
 148     Return ERT inset wrapping `cmd` as a list of strings.
 149
 150     `cmd` can be a string or list of lines. Non-ASCII characters are converted
 151     to the respective LICR macros if defined in unicodesymbols.
 152     """
 153     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 154     # It will be faster to work with a single string internally.
 155     if isinstance(cmd, list):
 156         cmd = u"\n".join(cmd)
 157     else:
 158         cmd = u"%s" % cmd # ensure it is an unicode instance
 159     cmd = cmd.translate(licr_table)
 160     cmd = cmd.replace("\\", "\n\\backslash\n")
 161     ret += cmd.splitlines()
 162     ret += ["\\end_layout", "", "\\end_inset"]
 163     return ret
 164
 165
 166 def get_ert(lines, i, verbatim = False):
 167     'Convert an ERT inset into LaTeX.'
 168     if not lines[i].startswith("\\begin_inset ERT"):
 169         return ""
 170     j = find_end_of_inset(lines, i)
 171     if j == -1:
 172         return ""
 173     while i < j and not lines[i].startswith("status"):
 174         i = i + 1
 175     i = i + 1
 176     ret = ""
 177     first = True
 178     while i < j:
 179         if lines[i] == "\\begin_layout Plain Layout":
 180             if first:
 181                 first = False
 182             else:
 183                 ret = ret + "\n"
 184             while i + 1 < j and lines[i+1] == "":
 185                 i = i + 1
 186         elif lines[i] == "\\end_layout":
 187             while i + 1 < j and lines[i+1] == "":
 188                 i = i + 1
 189         elif lines[i] == "\\backslash":
 190             if verbatim:
 191                 ret = ret + "\n" + lines[i] + "\n"
 192             else:
 193                 ret = ret + "\\"
 194         else:
 195             ret = ret + lines[i]
 196         i = i + 1
 197     return ret
 198
 199
 200 def lyx2latex(document, lines):
 201     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 202
 203     content = ""
 204     ert_end = 0
 205     note_end = 0
 206     hspace = ""
 207
 208     for curline in range(len(lines)):
 209       line = lines[curline]
 210       if line.startswith("\\begin_inset Note Note"):
 211           # We want to skip LyX notes, so remember where the inset ends
 212           note_end = find_end_of_inset(lines, curline + 1)
 213           continue
 214       elif note_end >= curline:
 215           # Skip LyX notes
 216           continue
 217       elif line.startswith("\\begin_inset ERT"):
 218           # We don't want to replace things inside ERT, so figure out
 219           # where the end of the inset is.
 220           ert_end = find_end_of_inset(lines, curline + 1)
 221           continue
 222       elif line.startswith("\\begin_inset Formula"):
 223           line = line[20:]
 224       elif line.startswith("\\begin_inset Quotes"):
 225           # For now, we do a very basic reversion. Someone who understands
 226           # quotes is welcome to fix it up.
 227           qtype = line[20:].strip()
 228           # lang = qtype[0]
 229           side = qtype[1]
 230           dbls = qtype[2]
 231           if side == "l":
 232               if dbls == "d":
 233                   line = "``"
 234               else:
 235                   line = "`"
 236           else:
 237               if dbls == "d":
 238                   line = "''"
 239               else:
 240                   line = "'"
 241       elif line.startswith("\\begin_inset Newline newline"):
 242           line = "\\\\ "
 243       elif line.startswith("\\noindent"):
 244           line = "\\noindent " # we need the space behind the command
 245       elif line.startswith("\\begin_inset space"):
 246           line = line[18:].strip()
 247           if line.startswith("\\hspace"):
 248               # Account for both \hspace and \hspace*
 249               hspace = line[:-2]
 250               continue
 251           elif line == "\\space{}":
 252               line = "\\ "
 253           elif line == "\\thinspace{}":
 254               line = "\\,"
 255       elif hspace != "":
 256           # The LyX length is in line[8:], after the \length keyword
 257           length = latex_length(line[8:])[1]
 258           line = hspace + "{" + length + "}"
 259           hspace = ""
 260       elif line.isspace() or \
 261             line.startswith("\\begin_layout") or \
 262             line.startswith("\\end_layout") or \
 263             line.startswith("\\begin_inset") or \
 264             line.startswith("\\end_inset") or \
 265             line.startswith("\\lang") or \
 266             line.strip() == "status collapsed" or \
 267             line.strip() == "status open":
 268           #skip all that stuff
 269           continue
 270
 271       # this needs to be added to the preamble because of cases like
 272       # \textmu, \textbackslash, etc.
 273       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 274                                  '\\@ifundefined{textmu}',
 275                                  ' {\\usepackage{textcomp}}{}'])
 276       # a lossless reversion is not possible
 277       # try at least to handle some common insets and settings
 278       if ert_end >= curline:
 279           line = line.replace(r'\backslash', '\\')
 280       else:
 281           # No need to add "{}" after single-nonletter macros
 282           line = line.replace('&', '\\&')
 283           line = line.replace('#', '\\#')
 284           line = line.replace('^', '\\textasciicircum{}')
 285           line = line.replace('%', '\\%')
 286           line = line.replace('_', '\\_')
 287           line = line.replace('$', '\\$')
 288
 289           # Do the LyX text --> LaTeX conversion
 290           for rep in unicode_reps:
 291               line = line.replace(rep[1], rep[0])
 292           line = line.replace(r'\backslash', r'\textbackslash{}')
 293           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 294           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 295           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 296           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 297           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 298           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 299           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 300           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 301           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 302       content += line
 303     return content
 304
 305
 306 def lyx2verbatim(document, lines):
 307     'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
 308
 309     content = lyx2latex(document, lines)
 310     content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
 311
 312     return content
 313
 314
 315 def latex_length(slen):
 316     '''
 317     Convert lengths to their LaTeX representation. Returns (bool, length),
 318     where the bool tells us if it was a percentage, and the length is the
 319     LaTeX representation.
 320     '''
 321     i = 0
 322     percent = False
 323     # the slen has the form
 324     # ValueUnit+ValueUnit-ValueUnit or
 325     # ValueUnit+-ValueUnit
 326     # the + and - (glue lengths) are optional
 327     # the + always precedes the -
 328
 329     # Convert relative lengths to LaTeX units
 330     units = {"col%": "\\columnwidth",
 331              "text%": "\\textwidth",
 332              "page%": "\\paperwidth",
 333              "line%": "\\linewidth",
 334              "theight%": "\\textheight",
 335              "pheight%": "\\paperheight",
 336              "baselineskip%": "\\baselineskip"
 337             }
 338     for unit in list(units.keys()):
 339         i = slen.find(unit)
 340         if i == -1:
 341             continue
 342         percent = True
 343         minus = slen.rfind("-", 1, i)
 344         plus = slen.rfind("+", 0, i)
 345         latex_unit = units[unit]
 346         if plus == -1 and minus == -1:
 347             value = slen[:i]
 348             value = str(float(value)/100)
 349             end = slen[i + len(unit):]
 350             slen = value + latex_unit + end
 351         if plus > minus:
 352             value = slen[plus + 1:i]
 353             value = str(float(value)/100)
 354             begin = slen[:plus + 1]
 355             end = slen[i+len(unit):]
 356             slen = begin + value + latex_unit + end
 357         if plus < minus:
 358             value = slen[minus + 1:i]
 359             value = str(float(value)/100)
 360             begin = slen[:minus + 1]
 361             slen = begin + value + latex_unit
 362
 363     # replace + and -, but only if the - is not the first character
 364     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 365     # handle the case where "+-1mm" was used, because LaTeX only understands
 366     # "plus 1mm minus 1mm"
 367     if slen.find("plus  minus"):
 368         lastvaluepos = slen.rfind(" ")
 369         lastvalue = slen[lastvaluepos:]
 370         slen = slen.replace("  ", lastvalue + " ")
 371     return (percent, slen)
 372
 373
 374 def length_in_bp(length):
 375     " Convert a length in LyX format to its value in bp units "
 376
 377     em_width = 10.0 / 72.27 # assume 10pt font size
 378     text_width = 8.27 / 1.7 # assume A4 with default margins
 379     # scale factors are taken from Length::inInch()
 380     scales = {"bp"       : 1.0,
 381               "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 382               "cm"       : (72.0 / 2.54),
 383               "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
 384               "em"       : (72.0 * em_width),
 385               "ex"       : (72.0 * em_width * 0.4305),
 386               "in"       : 72.0,
 387               "mm"       : (72.0 / 25.4),
 388               "mu"       : (72.0 * em_width / 18.0),
 389               "pc"       : (72.0 / (72.27 / 12.0)),
 390               "pt"       : (72.0 / (72.27)),
 391               "sp"       : (72.0 / (72.27 * 65536.0)),
 392               "text%"    : (72.0 * text_width / 100.0),
 393               "col%"     : (72.0 * text_width / 100.0), # assume 1 column
 394               "page%"    : (72.0 * text_width * 1.7 / 100.0),
 395               "line%"    : (72.0 * text_width / 100.0),
 396               "theight%" : (72.0 * text_width * 1.787 / 100.0),
 397               "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
 398
 399     rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
 400     m = rx.match(length)
 401     if not m:
 402         document.warning("Invalid length value: " + length + ".")
 403         return 0
 404     value = m.group(1)
 405     unit = m.group(2)
 406     if not unit in scales.keys():
 407         document.warning("Unknown length unit: " + unit + ".")
 408         return value
 409     return "%g" % (float(value) * scales[unit])
 410
 411
 412 def revert_flex_inset(lines, name, LaTeXname):
 413   " Convert flex insets to TeX code "
 414   i = 0
 415   while True:
 416     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 417     if i == -1:
 418       return
 419     z = find_end_of_inset(lines, i)
 420     if z == -1:
 421       document.warning("Can't find end of Flex " + name + " inset.")
 422       i += 1
 423       continue
 424     # remove the \end_inset
 425     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 426     # we need to reset character layouts if necessary
 427     j = find_token(lines, '\\emph on', i, z)
 428     k = find_token(lines, '\\noun on', i, z)
 429     l = find_token(lines, '\\series', i, z)
 430     m = find_token(lines, '\\family', i, z)
 431     n = find_token(lines, '\\shape', i, z)
 432     o = find_token(lines, '\\color', i, z)
 433     p = find_token(lines, '\\size', i, z)
 434     q = find_token(lines, '\\bar under', i, z)
 435     r = find_token(lines, '\\uuline on', i, z)
 436     s = find_token(lines, '\\uwave on', i, z)
 437     t = find_token(lines, '\\strikeout on', i, z)
 438     if j != -1:
 439       lines.insert(z - 2, "\\emph default")
 440     if k != -1:
 441       lines.insert(z - 2, "\\noun default")
 442     if l != -1:
 443       lines.insert(z - 2, "\\series default")
 444     if m != -1:
 445       lines.insert(z - 2, "\\family default")
 446     if n != -1:
 447       lines.insert(z - 2, "\\shape default")
 448     if o != -1:
 449       lines.insert(z - 2, "\\color inherit")
 450     if p != -1:
 451       lines.insert(z - 2, "\\size default")
 452     if q != -1:
 453       lines.insert(z - 2, "\\bar default")
 454     if r != -1:
 455       lines.insert(z - 2, "\\uuline default")
 456     if s != -1:
 457       lines.insert(z - 2, "\\uwave default")
 458     if t != -1:
 459       lines.insert(z - 2, "\\strikeout default")
 460     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 461     i += 1
 462
 463
 464 def revert_font_attrs(lines, name, LaTeXname):
 465   " Reverts font changes to TeX code "
 466   i = 0
 467   changed = False
 468   while True:
 469     i = find_token(lines, name + ' on', i)
 470     if i == -1:
 471       return changed
 472     j = find_token(lines, name + ' default', i)
 473     k = find_token(lines, name + ' on', i + 1)
 474     # if there is no default set, the style ends with the layout
 475     # assure hereby that we found the correct layout end
 476     if j != -1 and (j < k or k == -1):
 477       lines[j:j + 1] = put_cmd_in_ert("}")
 478     else:
 479       j = find_token(lines, '\\end_layout', i)
 480       lines[j:j] = put_cmd_in_ert("}")
 481     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 482     changed = True
 483     i += 1
 484
 485
 486 def revert_layout_command(lines, name, LaTeXname):
 487   " Reverts a command from a layout to TeX code "
 488   i = 0
 489   while True:
 490     i = find_token(lines, '\\begin_layout ' + name, i)
 491     if i == -1:
 492       return
 493     k = -1
 494     # find the next layout
 495     j = i + 1
 496     while k == -1:
 497       j = find_token(lines, '\\begin_layout', j)
 498       l = len(lines)
 499       # if nothing was found it was the last layout of the document
 500       if j == -1:
 501         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 502         k = 0
 503       # exclude plain layout because this can be TeX code or another inset
 504       elif lines[j] != '\\begin_layout Plain Layout':
 505         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 506         k = 0
 507       else:
 508         j += 1
 509     lines[i] = '\\begin_layout Standard'
 510     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 511     i += 1
 512
 513
 514 def hex2ratio(s):
 515   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 516   try:
 517     val = int(s, 16)
 518   except:
 519     val = 0
 520   if val != 0:
 521     val += 1
 522   return str(val / 256.0)
 523
 524
 525 def str2bool(s):
 526   "'true' goes to True, case-insensitively, and we strip whitespace."
 527   s = s.strip().lower()
 528   return s == "true"
 529
 530
 531 def convert_info_insets(document, type, func):
 532     "Convert info insets matching type using func."
 533     i = 0
 534     type_re = re.compile(r'^type\s+"(%s)"$' % type)
 535     arg_re = re.compile(r'^arg\s+"(.*)"$')
 536     while True:
 537         i = find_token(document.body, "\\begin_inset Info", i)
 538         if i == -1:
 539             return
 540         t = type_re.match(document.body[i + 1])
 541         if t:
 542             arg = arg_re.match(document.body[i + 2])
 543             if arg:
 544                 new_arg = func(arg.group(1))
 545                 document.body[i + 2] = 'arg   "%s"' % new_arg
 546         i += 3
 547
 548
 549 def insert_document_option(document, option):
 550     "Insert _option_ as a document option."
 551
 552     # Find \options in the header
 553     options_line = find_token(document.header, "\\options", 0)
 554
 555     # if the options does not exists add it after the textclass
 556     if options_line == -1:
 557         textclass_line = find_token(document.header, "\\textclass", 0)
 558         document.header.insert(textclass_line +1,
 559                                r"\options %s" % option)
 560         return
 561
 562     # add it to the end of the options
 563     document.header[options_line] += " ,%s" % option
 564
 565
 566 def remove_document_option(document, option):
 567     """ Remove _option_ as a document option.
 568
 569     It is assumed that option belongs to the \options.
 570     That can be done running is_document_option(document, option)."""
 571
 572     options_line = find_token(document.header, "\\options", 0)
 573     option_pos = document.header[options_line].find(option)
 574
 575     # Remove option from \options
 576     comma_before_pos = document.header[options_line].rfind(',', 0, option_pos)
 577     comma_after_pos  = document.header[options_line].find(',', option_pos)
 578
 579     # if there are no commas then it is the single option
 580     # and the options line should be removed since it will be empty
 581     if comma_before_pos == comma_after_pos == -1:
 582         del document.header[options_line]
 583         return
 584
 585     # last option
 586     options = document.header[options_line]
 587     if comma_after_pos == -1:
 588         document.header[options_line] = options[:comma_before_pos].rsplit()
 589         return
 590
 591     document.header[options_line] = options[comma_before_pos: comma_after_pos]
 592
 593
 594 def is_document_option(document, option):
 595     "Find if _option_ is a document option"
 596
 597     # Find \options in the header
 598     options_line = find_token(document.header, "\\options", 0)
 599
 600     # \options is not present in the header
 601     if options_line == -1:
 602         return False
 603
 604     option_pos = document.header[options_line].find(option)
 605     # option is not present in the \options
 606     if option_pos == -1:
 607         return False
 608
 609     return True
 610
 611
 612 def get_language_for_line(document, i):
 613     " Return the language for line number i"
 614     layout = get_containing_layout(document.body, i)
 615     if not layout:
 616         return document.language
 617     start_of_par = layout[3]
 618     for line in document.body[i:start_of_par:-1]:
 619         if line.startswith('\\lang '):
 620             return line[len('\\lang '):]
 621     return document.language