lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(arg):
  41   Here arg should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 get_ert(lines, i[, verbatim]):
  51   Here, lines is a list of lines of LyX material containing an ERT inset,
  52   whose content we want to convert to LaTeX. The ERT starts at index i.
  53   If the optional (by default: False) bool verbatim is True, the content
  54   of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
  55   for the use in verbatim insets.
  56
  57 lyx2latex(document, lines):
  58   Here, lines is a list of lines of LyX material we want to convert
  59   to LaTeX. We do the best we can and return a string containing
  60   the translated material.
  61
  62 lyx2verbatim(document, lines):
  63   Here, lines is a list of lines of LyX material we want to convert
  64   to verbatim material (used in ERT an the like). We do the best we
  65   can and return a string containing the translated material.
  66
  67 latex_length(slen):
  68     Convert lengths (in LyX form) to their LaTeX representation. Returns
  69     (bool, length), where the bool tells us if it was a percentage, and
  70     the length is the LaTeX representation.
  71
  72 convert_info_insets(document, type, func):
  73     Applies func to the argument of all info insets matching certain types
  74     type : the type to match. This can be a regular expression.
  75     func : function from string to string to apply to the "arg" field of
  76            the info insets.
  77 '''
  78
  79 import re
  80 import string
  81 from parser_tools import find_token, find_end_of_inset
  82 from unicode_symbols import unicode_reps
  83
  84
  85 # This will accept either a list of lines or a single line.
  86 # It is bad practice to pass something with embedded newlines,
  87 # though we will handle that.
  88 def add_to_preamble(document, text):
  89     " Add text to the preamble if it is not already there. "
  90
  91     if not type(text) is list:
  92       # split on \n just in case
  93       # it'll give us the one element list we want
  94       # if there's no \n, too
  95       text = text.split('\n')
  96
  97     i = 0
  98     prelen = len(document.preamble)
  99     while True:
 100       i = find_token(document.preamble, text[0], i)
 101       if i == -1:
 102         break
 103       # we need a perfect match
 104       matched = True
 105       for line in text:
 106         if i >= prelen or line != document.preamble[i]:
 107           matched = False
 108           break
 109         i += 1
 110       if matched:
 111         return
 112
 113     document.preamble.extend(["% Added by lyx2lyx"])
 114     document.preamble.extend(text)
 115
 116
 117 # Note that text can be either a list of lines or a single line.
 118 # It should really be a list.
 119 def insert_to_preamble(document, text, index = 0):
 120     """ Insert text to the preamble at a given line"""
 121
 122     if not type(text) is list:
 123       # split on \n just in case
 124       # it'll give us the one element list we want
 125       # if there's no \n, too
 126       text = text.split('\n')
 127
 128     text.insert(0, "% Added by lyx2lyx")
 129     document.preamble[index:index] = text
 130
 131
 132 def put_cmd_in_ert(arg):
 133     '''
 134     arg should be a list of lines we want to wrap in ERT.
 135     Returns a list of strings, with the lines so wrapped.
 136     '''
 137
 138     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 139     # It will be faster for us to work with a single string internally.
 140     # That way, we only go through the unicode_reps loop once.
 141     if type(arg) is list:
 142       s = "\n".join(arg)
 143     else:
 144       s = arg
 145     for rep in unicode_reps:
 146       s = s.replace(rep[1], rep[0])
 147     s = s.replace('\\', "\\backslash\n")
 148     ret += s.splitlines()
 149     ret += ["\\end_layout", "", "\\end_inset"]
 150     return ret
 151
 152
 153 def get_ert(lines, i, verbatim = False):
 154     'Convert an ERT inset into LaTeX.'
 155     if not lines[i].startswith("\\begin_inset ERT"):
 156         return ""
 157     j = find_end_of_inset(lines, i)
 158     if j == -1:
 159         return ""
 160     while i < j and not lines[i].startswith("status"):
 161         i = i + 1
 162     i = i + 1
 163     ret = ""
 164     first = True
 165     while i < j:
 166         if lines[i] == "\\begin_layout Plain Layout":
 167             if first:
 168                 first = False
 169             else:
 170                 ret = ret + "\n"
 171             while i + 1 < j and lines[i+1] == "":
 172                 i = i + 1
 173         elif lines[i] == "\\end_layout":
 174             while i + 1 < j and lines[i+1] == "":
 175                 i = i + 1
 176         elif lines[i] == "\\backslash":
 177             if verbatim:
 178                 ret = ret + "\n" + lines[i] + "\n"
 179             else:
 180                 ret = ret + "\\"
 181         else:
 182             ret = ret + lines[i]
 183         i = i + 1
 184     return ret
 185
 186
 187 def lyx2latex(document, lines):
 188     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 189
 190     content = ""
 191     ert_end = 0
 192     note_end = 0
 193     hspace = ""
 194
 195     for curline in range(len(lines)):
 196       line = lines[curline]
 197       if line.startswith("\\begin_inset Note Note"):
 198           # We want to skip LyX notes, so remember where the inset ends
 199           note_end = find_end_of_inset(lines, curline + 1)
 200           continue
 201       elif note_end >= curline:
 202           # Skip LyX notes
 203           continue
 204       elif line.startswith("\\begin_inset ERT"):
 205           # We don't want to replace things inside ERT, so figure out
 206           # where the end of the inset is.
 207           ert_end = find_end_of_inset(lines, curline + 1)
 208           continue
 209       elif line.startswith("\\begin_inset Formula"):
 210           line = line[20:]
 211       elif line.startswith("\\begin_inset Quotes"):
 212           # For now, we do a very basic reversion. Someone who understands
 213           # quotes is welcome to fix it up.
 214           qtype = line[20:].strip()
 215           # lang = qtype[0]
 216           side = qtype[1]
 217           dbls = qtype[2]
 218           if side == "l":
 219               if dbls == "d":
 220                   line = "``"
 221               else:
 222                   line = "`"
 223           else:
 224               if dbls == "d":
 225                   line = "''"
 226               else:
 227                   line = "'"
 228       elif line.startswith("\\begin_inset Newline newline"):
 229           line = "\\\\ "
 230       elif line.startswith("\\noindent"):
 231           line = "\\noindent " # we need the space behind the command
 232       elif line.startswith("\\begin_inset space"):
 233           line = line[18:].strip()
 234           if line.startswith("\\hspace"):
 235               # Account for both \hspace and \hspace*
 236               hspace = line[:-2]
 237               continue
 238           elif line == "\\space{}":
 239               line = "\\ "
 240           elif line == "\\thinspace{}":
 241               line = "\\,"
 242       elif hspace != "":
 243           # The LyX length is in line[8:], after the \length keyword
 244           length = latex_length(line[8:])[1]
 245           line = hspace + "{" + length + "}"
 246           hspace = ""
 247       elif line.isspace() or \
 248             line.startswith("\\begin_layout") or \
 249             line.startswith("\\end_layout") or \
 250             line.startswith("\\begin_inset") or \
 251             line.startswith("\\end_inset") or \
 252             line.startswith("\\lang") or \
 253             line.strip() == "status collapsed" or \
 254             line.strip() == "status open":
 255           #skip all that stuff
 256           continue
 257
 258       # this needs to be added to the preamble because of cases like
 259       # \textmu, \textbackslash, etc.
 260       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 261                                  '\\@ifundefined{textmu}',
 262                                  ' {\\usepackage{textcomp}}{}'])
 263       # a lossless reversion is not possible
 264       # try at least to handle some common insets and settings
 265       if ert_end >= curline:
 266           line = line.replace(r'\backslash', '\\')
 267       else:
 268           # No need to add "{}" after single-nonletter macros
 269           line = line.replace('&', '\\&')
 270           line = line.replace('#', '\\#')
 271           line = line.replace('^', '\\textasciicircum{}')
 272           line = line.replace('%', '\\%')
 273           line = line.replace('_', '\\_')
 274           line = line.replace('$', '\\$')
 275
 276           # Do the LyX text --> LaTeX conversion
 277           for rep in unicode_reps:
 278               line = line.replace(rep[1], rep[0])
 279           line = line.replace(r'\backslash', r'\textbackslash{}')
 280           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 281           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 282           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 283           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 284           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 285           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 286           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 287           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 288           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 289       content += line
 290     return content
 291
 292
 293 def lyx2verbatim(document, lines):
 294     'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
 295
 296     content = lyx2latex(document, lines)
 297     content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
 298
 299     return content
 300
 301
 302 def latex_length(slen):
 303     '''
 304     Convert lengths to their LaTeX representation. Returns (bool, length),
 305     where the bool tells us if it was a percentage, and the length is the
 306     LaTeX representation.
 307     '''
 308     i = 0
 309     percent = False
 310     # the slen has the form
 311     # ValueUnit+ValueUnit-ValueUnit or
 312     # ValueUnit+-ValueUnit
 313     # the + and - (glue lengths) are optional
 314     # the + always precedes the -
 315
 316     # Convert relative lengths to LaTeX units
 317     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 318              "page%":"\\paperwidth", "line%":"\\linewidth",
 319              "theight%":"\\textheight", "pheight%":"\\paperheight"}
 320     for unit in list(units.keys()):
 321         i = slen.find(unit)
 322         if i == -1:
 323             continue
 324         percent = True
 325         minus = slen.rfind("-", 1, i)
 326         plus = slen.rfind("+", 0, i)
 327         latex_unit = units[unit]
 328         if plus == -1 and minus == -1:
 329             value = slen[:i]
 330             value = str(float(value)/100)
 331             end = slen[i + len(unit):]
 332             slen = value + latex_unit + end
 333         if plus > minus:
 334             value = slen[plus + 1:i]
 335             value = str(float(value)/100)
 336             begin = slen[:plus + 1]
 337             end = slen[i+len(unit):]
 338             slen = begin + value + latex_unit + end
 339         if plus < minus:
 340             value = slen[minus + 1:i]
 341             value = str(float(value)/100)
 342             begin = slen[:minus + 1]
 343             slen = begin + value + latex_unit
 344
 345     # replace + and -, but only if the - is not the first character
 346     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 347     # handle the case where "+-1mm" was used, because LaTeX only understands
 348     # "plus 1mm minus 1mm"
 349     if slen.find("plus  minus"):
 350         lastvaluepos = slen.rfind(" ")
 351         lastvalue = slen[lastvaluepos:]
 352         slen = slen.replace("  ", lastvalue + " ")
 353     return (percent, slen)
 354
 355
 356 def length_in_bp(length):
 357     " Convert a length in LyX format to its value in bp units "
 358
 359     em_width = 10.0 / 72.27 # assume 10pt font size
 360     text_width = 8.27 / 1.7 # assume A4 with default margins
 361     # scale factors are taken from Length::inInch()
 362     scales = {"bp"       : 1.0,
 363               "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 364               "cm"       : (72.0 / 2.54),
 365               "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
 366               "em"       : (72.0 * em_width),
 367               "ex"       : (72.0 * em_width * 0.4305),
 368               "in"       : 72.0,
 369               "mm"       : (72.0 / 25.4),
 370               "mu"       : (72.0 * em_width / 18.0),
 371               "pc"       : (72.0 / (72.27 / 12.0)),
 372               "pt"       : (72.0 / (72.27)),
 373               "sp"       : (72.0 / (72.27 * 65536.0)),
 374               "text%"    : (72.0 * text_width / 100.0),
 375               "col%"     : (72.0 * text_width / 100.0), # assume 1 column
 376               "page%"    : (72.0 * text_width * 1.7 / 100.0),
 377               "line%"    : (72.0 * text_width / 100.0),
 378               "theight%" : (72.0 * text_width * 1.787 / 100.0),
 379               "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
 380
 381     rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
 382     m = rx.match(length)
 383     if not m:
 384         document.warning("Invalid length value: " + length + ".")
 385         return 0
 386     value = m.group(1)
 387     unit = m.group(2)
 388     if not unit in scales.keys():
 389         document.warning("Unknown length unit: " + unit + ".")
 390         return value
 391     return "%g" % (float(value) * scales[unit])
 392
 393
 394 def revert_flex_inset(lines, name, LaTeXname):
 395   " Convert flex insets to TeX code "
 396   i = 0
 397   while True:
 398     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 399     if i == -1:
 400       return
 401     z = find_end_of_inset(lines, i)
 402     if z == -1:
 403       document.warning("Can't find end of Flex " + name + " inset.")
 404       i += 1
 405       continue
 406     # remove the \end_inset
 407     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 408     # we need to reset character layouts if necessary
 409     j = find_token(lines, '\\emph on', i, z)
 410     k = find_token(lines, '\\noun on', i, z)
 411     l = find_token(lines, '\\series', i, z)
 412     m = find_token(lines, '\\family', i, z)
 413     n = find_token(lines, '\\shape', i, z)
 414     o = find_token(lines, '\\color', i, z)
 415     p = find_token(lines, '\\size', i, z)
 416     q = find_token(lines, '\\bar under', i, z)
 417     r = find_token(lines, '\\uuline on', i, z)
 418     s = find_token(lines, '\\uwave on', i, z)
 419     t = find_token(lines, '\\strikeout on', i, z)
 420     if j != -1:
 421       lines.insert(z - 2, "\\emph default")
 422     if k != -1:
 423       lines.insert(z - 2, "\\noun default")
 424     if l != -1:
 425       lines.insert(z - 2, "\\series default")
 426     if m != -1:
 427       lines.insert(z - 2, "\\family default")
 428     if n != -1:
 429       lines.insert(z - 2, "\\shape default")
 430     if o != -1:
 431       lines.insert(z - 2, "\\color inherit")
 432     if p != -1:
 433       lines.insert(z - 2, "\\size default")
 434     if q != -1:
 435       lines.insert(z - 2, "\\bar default")
 436     if r != -1:
 437       lines.insert(z - 2, "\\uuline default")
 438     if s != -1:
 439       lines.insert(z - 2, "\\uwave default")
 440     if t != -1:
 441       lines.insert(z - 2, "\\strikeout default")
 442     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 443     i += 1
 444
 445
 446 def revert_font_attrs(lines, name, LaTeXname):
 447   " Reverts font changes to TeX code "
 448   i = 0
 449   changed = False
 450   while True:
 451     i = find_token(lines, name + ' on', i)
 452     if i == -1:
 453       return changed
 454     j = find_token(lines, name + ' default', i)
 455     k = find_token(lines, name + ' on', i + 1)
 456     # if there is no default set, the style ends with the layout
 457     # assure hereby that we found the correct layout end
 458     if j != -1 and (j < k or k == -1):
 459       lines[j:j + 1] = put_cmd_in_ert("}")
 460     else:
 461       j = find_token(lines, '\\end_layout', i)
 462       lines[j:j] = put_cmd_in_ert("}")
 463     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 464     changed = True
 465     i += 1
 466
 467
 468 def revert_layout_command(lines, name, LaTeXname):
 469   " Reverts a command from a layout to TeX code "
 470   i = 0
 471   while True:
 472     i = find_token(lines, '\\begin_layout ' + name, i)
 473     if i == -1:
 474       return
 475     k = -1
 476     # find the next layout
 477     j = i + 1
 478     while k == -1:
 479       j = find_token(lines, '\\begin_layout', j)
 480       l = len(lines)
 481       # if nothing was found it was the last layout of the document
 482       if j == -1:
 483         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 484         k = 0
 485       # exclude plain layout because this can be TeX code or another inset
 486       elif lines[j] != '\\begin_layout Plain Layout':
 487         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 488         k = 0
 489       else:
 490         j += 1
 491     lines[i] = '\\begin_layout Standard'
 492     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 493     i += 1
 494
 495
 496 def hex2ratio(s):
 497   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 498   try:
 499     val = int(s, 16)
 500   except:
 501     val = 0
 502   if val != 0:
 503     val += 1
 504   return str(val / 256.0)
 505
 506
 507 def str2bool(s):
 508   "'true' goes to True, case-insensitively, and we strip whitespace."
 509   s = s.strip().lower()
 510   return s == "true"
 511
 512
 513 def convert_info_insets(document, type, func):
 514     "Convert info insets matching type using func."
 515     i = 0
 516     type_re = re.compile(r'^type\s+"(%s)"$' % type)
 517     arg_re = re.compile(r'^arg\s+"(.*)"$')
 518     while True:
 519         i = find_token(document.body, "\\begin_inset Info", i)
 520         if i == -1:
 521             return
 522         t = type_re.match(document.body[i + 1])
 523         if t:
 524             arg = arg_re.match(document.body[i + 2])
 525             if arg:
 526                 new_arg = func(arg.group(1))
 527                 document.body[i + 2] = 'arg   "%s"' % new_arg
 528         i += 3