lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(cmd):
  41   Here cmd should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 get_ert(lines, i[, verbatim]):
  51   Here, lines is a list of lines of LyX material containing an ERT inset,
  52   whose content we want to convert to LaTeX. The ERT starts at index i.
  53   If the optional (by default: False) bool verbatim is True, the content
  54   of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
  55   for the use in verbatim insets.
  56
  57 lyx2latex(document, lines):
  58   Here, lines is a list of lines of LyX material we want to convert
  59   to LaTeX. We do the best we can and return a string containing
  60   the translated material.
  61
  62 lyx2verbatim(document, lines):
  63   Here, lines is a list of lines of LyX material we want to convert
  64   to verbatim material (used in ERT an the like). We do the best we
  65   can and return a string containing the translated material.
  66
  67 latex_length(slen):
  68   Convert lengths (in LyX form) to their LaTeX representation. Returns
  69   (bool, length), where the bool tells us if it was a percentage, and
  70   the length is the LaTeX representation.
  71
  72 convert_info_insets(document, type, func):
  73   Applies func to the argument of all info insets matching certain types
  74   type : the type to match. This can be a regular expression.
  75   func : function from string to string to apply to the "arg" field of
  76          the info insets.
  77
  78 is_document_option(document, option):
  79   Find if _option_ is a document option (\\options in the header).
  80
  81 insert_document_option(document, option):
  82   Insert _option_ as a document option.
  83
  84 remove_document_option(document, option):
  85   Remove _option_ as a document option.
  86 '''
  87
  88 import re
  89 from parser_tools import find_token, find_end_of_inset, get_containing_layout
  90 from unicode_symbols import unicode_reps
  91
  92 # This will accept either a list of lines or a single line.
  93 # It is bad practice to pass something with embedded newlines,
  94 # though we will handle that.
  95 def add_to_preamble(document, text):
  96     " Add text to the preamble if it is not already there. "
  97
  98     if not type(text) is list:
  99       # split on \n just in case
 100       # it'll give us the one element list we want
 101       # if there's no \n, too
 102       text = text.split('\n')
 103
 104     i = 0
 105     prelen = len(document.preamble)
 106     while True:
 107       i = find_token(document.preamble, text[0], i)
 108       if i == -1:
 109         break
 110       # we need a perfect match
 111       matched = True
 112       for line in text:
 113         if i >= prelen or line != document.preamble[i]:
 114           matched = False
 115           break
 116         i += 1
 117       if matched:
 118         return
 119
 120     document.preamble.extend(["% Added by lyx2lyx"])
 121     document.preamble.extend(text)
 122
 123
 124 # Note that text can be either a list of lines or a single line.
 125 # It should really be a list.
 126 def insert_to_preamble(document, text, index = 0):
 127     """ Insert text to the preamble at a given line"""
 128
 129     if not type(text) is list:
 130       # split on \n just in case
 131       # it'll give us the one element list we want
 132       # if there's no \n, too
 133       text = text.split('\n')
 134
 135     text.insert(0, "% Added by lyx2lyx")
 136     document.preamble[index:index] = text
 137
 138
 139 # A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
 140 # Created from the reversed list to keep the first of alternative definitions.
 141 licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
 142
 143 def put_cmd_in_ert(cmd):
 144     """
 145     Return ERT inset wrapping `cmd` as a list of strings.
 146
 147     `cmd` can be a string or list of lines. Non-ASCII characters are converted
 148     to the respective LICR macros if defined in unicodesymbols.
 149     """
 150     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 151     # It will be faster to work with a single string internally.
 152     if isinstance(cmd, list):
 153         cmd = u"\n".join(cmd)
 154     else:
 155         cmd = u"%s" % cmd # ensure it is an unicode instance
 156     cmd = cmd.translate(licr_table)
 157     cmd = cmd.replace("\\", "\n\\backslash\n")
 158     ret += cmd.splitlines()
 159     ret += ["\\end_layout", "", "\\end_inset"]
 160     return ret
 161
 162
 163 def get_ert(lines, i, verbatim = False):
 164     'Convert an ERT inset into LaTeX.'
 165     if not lines[i].startswith("\\begin_inset ERT"):
 166         return ""
 167     j = find_end_of_inset(lines, i)
 168     if j == -1:
 169         return ""
 170     while i < j and not lines[i].startswith("status"):
 171         i = i + 1
 172     i = i + 1
 173     ret = ""
 174     first = True
 175     while i < j:
 176         if lines[i] == "\\begin_layout Plain Layout":
 177             if first:
 178                 first = False
 179             else:
 180                 ret = ret + "\n"
 181             while i + 1 < j and lines[i+1] == "":
 182                 i = i + 1
 183         elif lines[i] == "\\end_layout":
 184             while i + 1 < j and lines[i+1] == "":
 185                 i = i + 1
 186         elif lines[i] == "\\backslash":
 187             if verbatim:
 188                 ret = ret + "\n" + lines[i] + "\n"
 189             else:
 190                 ret = ret + "\\"
 191         else:
 192             ret = ret + lines[i]
 193         i = i + 1
 194     return ret
 195
 196
 197 def lyx2latex(document, lines):
 198     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 199
 200     content = ""
 201     ert_end = 0
 202     note_end = 0
 203     hspace = ""
 204
 205     for curline in range(len(lines)):
 206       line = lines[curline]
 207       if line.startswith("\\begin_inset Note Note"):
 208           # We want to skip LyX notes, so remember where the inset ends
 209           note_end = find_end_of_inset(lines, curline + 1)
 210           continue
 211       elif note_end >= curline:
 212           # Skip LyX notes
 213           continue
 214       elif line.startswith("\\begin_inset ERT"):
 215           # We don't want to replace things inside ERT, so figure out
 216           # where the end of the inset is.
 217           ert_end = find_end_of_inset(lines, curline + 1)
 218           continue
 219       elif line.startswith("\\begin_inset Formula"):
 220           line = line[20:]
 221       elif line.startswith("\\begin_inset Quotes"):
 222           # For now, we do a very basic reversion. Someone who understands
 223           # quotes is welcome to fix it up.
 224           qtype = line[20:].strip()
 225           # lang = qtype[0]
 226           side = qtype[1]
 227           dbls = qtype[2]
 228           if side == "l":
 229               if dbls == "d":
 230                   line = "``"
 231               else:
 232                   line = "`"
 233           else:
 234               if dbls == "d":
 235                   line = "''"
 236               else:
 237                   line = "'"
 238       elif line.startswith("\\begin_inset Newline newline"):
 239           line = "\\\\ "
 240       elif line.startswith("\\noindent"):
 241           line = "\\noindent " # we need the space behind the command
 242       elif line.startswith("\\begin_inset space"):
 243           line = line[18:].strip()
 244           if line.startswith("\\hspace"):
 245               # Account for both \hspace and \hspace*
 246               hspace = line[:-2]
 247               continue
 248           elif line == "\\space{}":
 249               line = "\\ "
 250           elif line == "\\thinspace{}":
 251               line = "\\,"
 252       elif hspace != "":
 253           # The LyX length is in line[8:], after the \length keyword
 254           length = latex_length(line[8:])[1]
 255           line = hspace + "{" + length + "}"
 256           hspace = ""
 257       elif line.isspace() or \
 258             line.startswith("\\begin_layout") or \
 259             line.startswith("\\end_layout") or \
 260             line.startswith("\\begin_inset") or \
 261             line.startswith("\\end_inset") or \
 262             line.startswith("\\lang") or \
 263             line.strip() == "status collapsed" or \
 264             line.strip() == "status open":
 265           #skip all that stuff
 266           continue
 267
 268       # this needs to be added to the preamble because of cases like
 269       # \textmu, \textbackslash, etc.
 270       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 271                                  '\\@ifundefined{textmu}',
 272                                  ' {\\usepackage{textcomp}}{}'])
 273       # a lossless reversion is not possible
 274       # try at least to handle some common insets and settings
 275       if ert_end >= curline:
 276           line = line.replace(r'\backslash', '\\')
 277       else:
 278           # No need to add "{}" after single-nonletter macros
 279           line = line.replace('&', '\\&')
 280           line = line.replace('#', '\\#')
 281           line = line.replace('^', '\\textasciicircum{}')
 282           line = line.replace('%', '\\%')
 283           line = line.replace('_', '\\_')
 284           line = line.replace('$', '\\$')
 285
 286           # Do the LyX text --> LaTeX conversion
 287           for rep in unicode_reps:
 288               line = line.replace(rep[1], rep[0])
 289           line = line.replace(r'\backslash', r'\textbackslash{}')
 290           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 291           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 292           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 293           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 294           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 295           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 296           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 297           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 298           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 299       content += line
 300     return content
 301
 302
 303 def lyx2verbatim(document, lines):
 304     'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
 305
 306     content = lyx2latex(document, lines)
 307     content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
 308
 309     return content
 310
 311
 312 def latex_length(slen):
 313     '''
 314     Convert lengths to their LaTeX representation. Returns (bool, length),
 315     where the bool tells us if it was a percentage, and the length is the
 316     LaTeX representation.
 317     '''
 318     i = 0
 319     percent = False
 320     # the slen has the form
 321     # ValueUnit+ValueUnit-ValueUnit or
 322     # ValueUnit+-ValueUnit
 323     # the + and - (glue lengths) are optional
 324     # the + always precedes the -
 325
 326     # Convert relative lengths to LaTeX units
 327     units = {"col%": "\\columnwidth",
 328              "text%": "\\textwidth",
 329              "page%": "\\paperwidth",
 330              "line%": "\\linewidth",
 331              "theight%": "\\textheight",
 332              "pheight%": "\\paperheight",
 333              "baselineskip%": "\\baselineskip"
 334             }
 335     for unit in list(units.keys()):
 336         i = slen.find(unit)
 337         if i == -1:
 338             continue
 339         percent = True
 340         minus = slen.rfind("-", 1, i)
 341         plus = slen.rfind("+", 0, i)
 342         latex_unit = units[unit]
 343         if plus == -1 and minus == -1:
 344             value = slen[:i]
 345             value = str(float(value)/100)
 346             end = slen[i + len(unit):]
 347             slen = value + latex_unit + end
 348         if plus > minus:
 349             value = slen[plus + 1:i]
 350             value = str(float(value)/100)
 351             begin = slen[:plus + 1]
 352             end = slen[i+len(unit):]
 353             slen = begin + value + latex_unit + end
 354         if plus < minus:
 355             value = slen[minus + 1:i]
 356             value = str(float(value)/100)
 357             begin = slen[:minus + 1]
 358             slen = begin + value + latex_unit
 359
 360     # replace + and -, but only if the - is not the first character
 361     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 362     # handle the case where "+-1mm" was used, because LaTeX only understands
 363     # "plus 1mm minus 1mm"
 364     if slen.find("plus  minus"):
 365         lastvaluepos = slen.rfind(" ")
 366         lastvalue = slen[lastvaluepos:]
 367         slen = slen.replace("  ", lastvalue + " ")
 368     return (percent, slen)
 369
 370
 371 def length_in_bp(length):
 372     " Convert a length in LyX format to its value in bp units "
 373
 374     em_width = 10.0 / 72.27 # assume 10pt font size
 375     text_width = 8.27 / 1.7 # assume A4 with default margins
 376     # scale factors are taken from Length::inInch()
 377     scales = {"bp"       : 1.0,
 378               "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 379               "cm"       : (72.0 / 2.54),
 380               "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
 381               "em"       : (72.0 * em_width),
 382               "ex"       : (72.0 * em_width * 0.4305),
 383               "in"       : 72.0,
 384               "mm"       : (72.0 / 25.4),
 385               "mu"       : (72.0 * em_width / 18.0),
 386               "pc"       : (72.0 / (72.27 / 12.0)),
 387               "pt"       : (72.0 / (72.27)),
 388               "sp"       : (72.0 / (72.27 * 65536.0)),
 389               "text%"    : (72.0 * text_width / 100.0),
 390               "col%"     : (72.0 * text_width / 100.0), # assume 1 column
 391               "page%"    : (72.0 * text_width * 1.7 / 100.0),
 392               "line%"    : (72.0 * text_width / 100.0),
 393               "theight%" : (72.0 * text_width * 1.787 / 100.0),
 394               "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
 395
 396     rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
 397     m = rx.match(length)
 398     if not m:
 399         document.warning("Invalid length value: " + length + ".")
 400         return 0
 401     value = m.group(1)
 402     unit = m.group(2)
 403     if not unit in scales.keys():
 404         document.warning("Unknown length unit: " + unit + ".")
 405         return value
 406     return "%g" % (float(value) * scales[unit])
 407
 408
 409 def revert_flex_inset(lines, name, LaTeXname):
 410   " Convert flex insets to TeX code "
 411   i = 0
 412   while True:
 413     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 414     if i == -1:
 415       return
 416     z = find_end_of_inset(lines, i)
 417     if z == -1:
 418       document.warning("Can't find end of Flex " + name + " inset.")
 419       i += 1
 420       continue
 421     # remove the \end_inset
 422     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 423     # we need to reset character layouts if necessary
 424     j = find_token(lines, '\\emph on', i, z)
 425     k = find_token(lines, '\\noun on', i, z)
 426     l = find_token(lines, '\\series', i, z)
 427     m = find_token(lines, '\\family', i, z)
 428     n = find_token(lines, '\\shape', i, z)
 429     o = find_token(lines, '\\color', i, z)
 430     p = find_token(lines, '\\size', i, z)
 431     q = find_token(lines, '\\bar under', i, z)
 432     r = find_token(lines, '\\uuline on', i, z)
 433     s = find_token(lines, '\\uwave on', i, z)
 434     t = find_token(lines, '\\strikeout on', i, z)
 435     if j != -1:
 436       lines.insert(z - 2, "\\emph default")
 437     if k != -1:
 438       lines.insert(z - 2, "\\noun default")
 439     if l != -1:
 440       lines.insert(z - 2, "\\series default")
 441     if m != -1:
 442       lines.insert(z - 2, "\\family default")
 443     if n != -1:
 444       lines.insert(z - 2, "\\shape default")
 445     if o != -1:
 446       lines.insert(z - 2, "\\color inherit")
 447     if p != -1:
 448       lines.insert(z - 2, "\\size default")
 449     if q != -1:
 450       lines.insert(z - 2, "\\bar default")
 451     if r != -1:
 452       lines.insert(z - 2, "\\uuline default")
 453     if s != -1:
 454       lines.insert(z - 2, "\\uwave default")
 455     if t != -1:
 456       lines.insert(z - 2, "\\strikeout default")
 457     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 458     i += 1
 459
 460
 461 def revert_font_attrs(lines, name, LaTeXname):
 462   " Reverts font changes to TeX code "
 463   i = 0
 464   changed = False
 465   while True:
 466     i = find_token(lines, name + ' on', i)
 467     if i == -1:
 468       break
 469     j = find_token(lines, name + ' default', i)
 470     k = find_token(lines, name + ' on', i + 1)
 471     # if there is no default set, the style ends with the layout
 472     # assure hereby that we found the correct layout end
 473     if j != -1 and (j < k or k == -1):
 474       lines[j:j + 1] = put_cmd_in_ert("}")
 475     else:
 476       j = find_token(lines, '\\end_layout', i)
 477       lines[j:j] = put_cmd_in_ert("}")
 478     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 479     changed = True
 480     i += 1
 481
 482   # now delete all remaining lines that manipulate this attribute
 483   i = 0
 484   while True:
 485     i = find_token(lines, name, i)
 486     if i == -1:
 487       break
 488     del lines[i]
 489
 490   return changed
 491
 492
 493 def revert_layout_command(lines, name, LaTeXname):
 494   " Reverts a command from a layout to TeX code "
 495   i = 0
 496   while True:
 497     i = find_token(lines, '\\begin_layout ' + name, i)
 498     if i == -1:
 499       return
 500     k = -1
 501     # find the next layout
 502     j = i + 1
 503     while k == -1:
 504       j = find_token(lines, '\\begin_layout', j)
 505       l = len(lines)
 506       # if nothing was found it was the last layout of the document
 507       if j == -1:
 508         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 509         k = 0
 510       # exclude plain layout because this can be TeX code or another inset
 511       elif lines[j] != '\\begin_layout Plain Layout':
 512         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 513         k = 0
 514       else:
 515         j += 1
 516     lines[i] = '\\begin_layout Standard'
 517     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 518     i += 1
 519
 520
 521 def hex2ratio(s):
 522   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 523   try:
 524     val = int(s, 16)
 525   except:
 526     val = 0
 527   if val != 0:
 528     val += 1
 529   return str(val / 256.0)
 530
 531
 532 def str2bool(s):
 533   "'true' goes to True, case-insensitively, and we strip whitespace."
 534   s = s.strip().lower()
 535   return s == "true"
 536
 537
 538 def convert_info_insets(document, type, func):
 539     "Convert info insets matching type using func."
 540     i = 0
 541     type_re = re.compile(r'^type\s+"(%s)"$' % type)
 542     arg_re = re.compile(r'^arg\s+"(.*)"$')
 543     while True:
 544         i = find_token(document.body, "\\begin_inset Info", i)
 545         if i == -1:
 546             return
 547         t = type_re.match(document.body[i + 1])
 548         if t:
 549             arg = arg_re.match(document.body[i + 2])
 550             if arg:
 551                 new_arg = func(arg.group(1))
 552                 document.body[i + 2] = 'arg   "%s"' % new_arg
 553         i += 3
 554
 555
 556 def insert_document_option(document, option):
 557     "Insert _option_ as a document option."
 558
 559     # Find \options in the header
 560     options_line = find_token(document.header, "\\options", 0)
 561
 562     # if the options does not exists add it after the textclass
 563     if options_line == -1:
 564         textclass_line = find_token(document.header, "\\textclass", 0)
 565         document.header.insert(textclass_line +1,
 566                                r"\options %s" % option)
 567         return
 568
 569     # add it to the end of the options
 570     document.header[options_line] += " ,%s" % option
 571
 572
 573 def remove_document_option(document, option):
 574     """ Remove _option_ as a document option.
 575
 576     It is assumed that option belongs to the \options.
 577     That can be done running is_document_option(document, option)."""
 578
 579     options_line = find_token(document.header, "\\options", 0)
 580     option_pos = document.header[options_line].find(option)
 581
 582     # Remove option from \options
 583     comma_before_pos = document.header[options_line].rfind(',', 0, option_pos)
 584     comma_after_pos  = document.header[options_line].find(',', option_pos)
 585
 586     # if there are no commas then it is the single option
 587     # and the options line should be removed since it will be empty
 588     if comma_before_pos == comma_after_pos == -1:
 589         del document.header[options_line]
 590         return
 591
 592     # last option
 593     options = document.header[options_line]
 594     if comma_after_pos == -1:
 595         document.header[options_line] = options[:comma_before_pos].rsplit()
 596         return
 597
 598     document.header[options_line] = options[comma_before_pos: comma_after_pos]
 599
 600
 601 def is_document_option(document, option):
 602     "Find if _option_ is a document option"
 603
 604     # Find \options in the header
 605     options_line = find_token(document.header, "\\options", 0)
 606
 607     # \options is not present in the header
 608     if options_line == -1:
 609         return False
 610
 611     option_pos = document.header[options_line].find(option)
 612     # option is not present in the \options
 613     if option_pos == -1:
 614         return False
 615
 616     return True