lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(arg):
  41   Here arg should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 get_ert(lines, i[, verbatim]):
  51   Here, lines is a list of lines of LyX material containing an ERT inset,
  52   whose content we want to convert to LaTeX. The ERT starts at index i.
  53   If the optional (by default: False) bool verbatim is True, the content
  54   of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
  55   for the use in verbatim insets.
  56
  57 lyx2latex(document, lines):
  58   Here, lines is a list of lines of LyX material we want to convert
  59   to LaTeX. We do the best we can and return a string containing
  60   the translated material.
  61
  62 lyx2verbatim(document, lines):
  63   Here, lines is a list of lines of LyX material we want to convert
  64   to verbatim material (used in ERT an the like). We do the best we
  65   can and return a string containing the translated material.
  66
  67 latex_length(slen):
  68     Convert lengths (in LyX form) to their LaTeX representation. Returns
  69     (bool, length), where the bool tells us if it was a percentage, and
  70     the length is the LaTeX representation.
  71
  72 '''
  73
  74 import re
  75 import string
  76 from parser_tools import find_token, find_end_of_inset
  77 from unicode_symbols import unicode_reps
  78
  79
  80 # This will accept either a list of lines or a single line.
  81 # It is bad practice to pass something with embedded newlines,
  82 # though we will handle that.
  83 def add_to_preamble(document, text):
  84     " Add text to the preamble if it is not already there. "
  85
  86     if not type(text) is list:
  87       # split on \n just in case
  88       # it'll give us the one element list we want
  89       # if there's no \n, too
  90       text = text.split('\n')
  91
  92     i = 0
  93     prelen = len(document.preamble)
  94     while True:
  95       i = find_token(document.preamble, text[0], i)
  96       if i == -1:
  97         break
  98       # we need a perfect match
  99       matched = True
 100       for line in text:
 101         if i >= prelen or line != document.preamble[i]:
 102           matched = False
 103           break
 104         i += 1
 105       if matched:
 106         return
 107
 108     document.preamble.extend(["% Added by lyx2lyx"])
 109     document.preamble.extend(text)
 110
 111
 112 # Note that text can be either a list of lines or a single line.
 113 # It should really be a list.
 114 def insert_to_preamble(document, text, index = 0):
 115     """ Insert text to the preamble at a given line"""
 116
 117     if not type(text) is list:
 118       # split on \n just in case
 119       # it'll give us the one element list we want
 120       # if there's no \n, too
 121       text = text.split('\n')
 122
 123     text.insert(0, "% Added by lyx2lyx")
 124     document.preamble[index:index] = text
 125
 126
 127 def put_cmd_in_ert(arg):
 128     '''
 129     arg should be a list of lines we want to wrap in ERT.
 130     Returns a list of strings, with the lines so wrapped.
 131     '''
 132
 133     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 134     # It will be faster for us to work with a single string internally.
 135     # That way, we only go through the unicode_reps loop once.
 136     if type(arg) is list:
 137       s = "\n".join(arg)
 138     else:
 139       s = arg
 140     for rep in unicode_reps:
 141       s = s.replace(rep[1], rep[0])
 142     s = s.replace('\\', "\\backslash\n")
 143     ret += s.splitlines()
 144     ret += ["\\end_layout", "", "\\end_inset"]
 145     return ret
 146
 147
 148 def get_ert(lines, i, verbatim = False):
 149     'Convert an ERT inset into LaTeX.'
 150     if not lines[i].startswith("\\begin_inset ERT"):
 151         return ""
 152     j = find_end_of_inset(lines, i)
 153     if j == -1:
 154         return ""
 155     while i < j and not lines[i].startswith("status"):
 156         i = i + 1
 157     i = i + 1
 158     ret = ""
 159     first = True
 160     while i < j:
 161         if lines[i] == "\\begin_layout Plain Layout":
 162             if first:
 163                 first = False
 164             else:
 165                 ret = ret + "\n"
 166             while i + 1 < j and lines[i+1] == "":
 167                 i = i + 1
 168         elif lines[i] == "\\end_layout":
 169             while i + 1 < j and lines[i+1] == "":
 170                 i = i + 1
 171         elif lines[i] == "\\backslash":
 172             if verbatim:
 173                 ret = ret + "\n" + lines[i] + "\n"
 174             else:
 175                 ret = ret + "\\"
 176         else:
 177             ret = ret + lines[i]
 178         i = i + 1
 179     return ret
 180
 181
 182 def lyx2latex(document, lines):
 183     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 184
 185     content = ""
 186     ert_end = 0
 187     note_end = 0
 188     hspace = ""
 189
 190     for curline in range(len(lines)):
 191       line = lines[curline]
 192       if line.startswith("\\begin_inset Note Note"):
 193           # We want to skip LyX notes, so remember where the inset ends
 194           note_end = find_end_of_inset(lines, curline + 1)
 195           continue
 196       elif note_end >= curline:
 197           # Skip LyX notes
 198           continue
 199       elif line.startswith("\\begin_inset ERT"):
 200           # We don't want to replace things inside ERT, so figure out
 201           # where the end of the inset is.
 202           ert_end = find_end_of_inset(lines, curline + 1)
 203           continue
 204       elif line.startswith("\\begin_inset Formula"):
 205           line = line[20:]
 206       elif line.startswith("\\begin_inset Quotes"):
 207           # For now, we do a very basic reversion. Someone who understands
 208           # quotes is welcome to fix it up.
 209           qtype = line[20:].strip()
 210           # lang = qtype[0]
 211           side = qtype[1]
 212           dbls = qtype[2]
 213           if side == "l":
 214               if dbls == "d":
 215                   line = "``"
 216               else:
 217                   line = "`"
 218           else:
 219               if dbls == "d":
 220                   line = "''"
 221               else:
 222                   line = "'"
 223       elif line.startswith("\\begin_inset Newline newline"):
 224           line = "\\\\ "
 225       elif line.startswith("\\noindent"):
 226           line = "\\noindent " # we need the space behind the command
 227       elif line.startswith("\\begin_inset space"):
 228           line = line[18:].strip()
 229           if line.startswith("\\hspace"):
 230               # Account for both \hspace and \hspace*
 231               hspace = line[:-2]
 232               continue
 233           elif line == "\\space{}":
 234               line = "\\ "
 235           elif line == "\\thinspace{}":
 236               line = "\\,"
 237       elif hspace != "":
 238           # The LyX length is in line[8:], after the \length keyword
 239           length = latex_length(line[8:])[1]
 240           line = hspace + "{" + length + "}"
 241           hspace = ""
 242       elif line.isspace() or \
 243             line.startswith("\\begin_layout") or \
 244             line.startswith("\\end_layout") or \
 245             line.startswith("\\begin_inset") or \
 246             line.startswith("\\end_inset") or \
 247             line.startswith("\\lang") or \
 248             line.strip() == "status collapsed" or \
 249             line.strip() == "status open":
 250           #skip all that stuff
 251           continue
 252
 253       # this needs to be added to the preamble because of cases like
 254       # \textmu, \textbackslash, etc.
 255       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 256                                  '\\@ifundefined{textmu}',
 257                                  ' {\\usepackage{textcomp}}{}'])
 258       # a lossless reversion is not possible
 259       # try at least to handle some common insets and settings
 260       if ert_end >= curline:
 261           line = line.replace(r'\backslash', '\\')
 262       else:
 263           # No need to add "{}" after single-nonletter macros
 264           line = line.replace('&', '\\&')
 265           line = line.replace('#', '\\#')
 266           line = line.replace('^', '\\textasciicircum{}')
 267           line = line.replace('%', '\\%')
 268           line = line.replace('_', '\\_')
 269           line = line.replace('$', '\\$')
 270
 271           # Do the LyX text --> LaTeX conversion
 272           for rep in unicode_reps:
 273               line = line.replace(rep[1], rep[0])
 274           line = line.replace(r'\backslash', r'\textbackslash{}')
 275           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 276           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 277           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 278           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 279           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 280           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 281           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 282           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 283           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 284       content += line
 285     return content
 286
 287
 288 def lyx2verbatim(document, lines):
 289     'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
 290
 291     content = lyx2latex(document, lines)
 292     content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
 293
 294     return content
 295
 296
 297 def latex_length(slen):
 298     '''
 299     Convert lengths to their LaTeX representation. Returns (bool, length),
 300     where the bool tells us if it was a percentage, and the length is the
 301     LaTeX representation.
 302     '''
 303     i = 0
 304     percent = False
 305     # the slen has the form
 306     # ValueUnit+ValueUnit-ValueUnit or
 307     # ValueUnit+-ValueUnit
 308     # the + and - (glue lengths) are optional
 309     # the + always precedes the -
 310
 311     # Convert relative lengths to LaTeX units
 312     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 313              "page%":"\\paperwidth", "line%":"\\linewidth",
 314              "theight%":"\\textheight", "pheight%":"\\paperheight"}
 315     for unit in list(units.keys()):
 316         i = slen.find(unit)
 317         if i == -1:
 318             continue
 319         percent = True
 320         minus = slen.rfind("-", 1, i)
 321         plus = slen.rfind("+", 0, i)
 322         latex_unit = units[unit]
 323         if plus == -1 and minus == -1:
 324             value = slen[:i]
 325             value = str(float(value)/100)
 326             end = slen[i + len(unit):]
 327             slen = value + latex_unit + end
 328         if plus > minus:
 329             value = slen[plus + 1:i]
 330             value = str(float(value)/100)
 331             begin = slen[:plus + 1]
 332             end = slen[i+len(unit):]
 333             slen = begin + value + latex_unit + end
 334         if plus < minus:
 335             value = slen[minus + 1:i]
 336             value = str(float(value)/100)
 337             begin = slen[:minus + 1]
 338             slen = begin + value + latex_unit
 339
 340     # replace + and -, but only if the - is not the first character
 341     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 342     # handle the case where "+-1mm" was used, because LaTeX only understands
 343     # "plus 1mm minus 1mm"
 344     if slen.find("plus  minus"):
 345         lastvaluepos = slen.rfind(" ")
 346         lastvalue = slen[lastvaluepos:]
 347         slen = slen.replace("  ", lastvalue + " ")
 348     return (percent, slen)
 349
 350
 351 def length_in_bp(length):
 352     " Convert a length in LyX format to its value in bp units "
 353
 354     em_width = 10.0 / 72.27 # assume 10pt font size
 355     text_width = 8.27 / 1.7 # assume A4 with default margins
 356     # scale factors are taken from Length::inInch()
 357     scales = {"bp"       : 1.0,
 358               "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 359               "cm"       : (72.0 / 2.54),
 360               "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
 361               "em"       : (72.0 * em_width),
 362               "ex"       : (72.0 * em_width * 0.4305),
 363               "in"       : 72.0,
 364               "mm"       : (72.0 / 25.4),
 365               "mu"       : (72.0 * em_width / 18.0),
 366               "pc"       : (72.0 / (72.27 / 12.0)),
 367               "pt"       : (72.0 / (72.27)),
 368               "sp"       : (72.0 / (72.27 * 65536.0)),
 369               "text%"    : (72.0 * text_width / 100.0),
 370               "col%"     : (72.0 * text_width / 100.0), # assume 1 column
 371               "page%"    : (72.0 * text_width * 1.7 / 100.0),
 372               "line%"    : (72.0 * text_width / 100.0),
 373               "theight%" : (72.0 * text_width * 1.787 / 100.0),
 374               "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
 375
 376     rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
 377     m = rx.match(length)
 378     if not m:
 379         document.warning("Invalid length value: " + length + ".")
 380         return 0
 381     value = m.group(1)
 382     unit = m.group(2)
 383     if not unit in scales.keys():
 384         document.warning("Unknown length unit: " + unit + ".")
 385         return value
 386     return "%g" % (float(value) * scales[unit])
 387
 388
 389 def revert_flex_inset(lines, name, LaTeXname):
 390   " Convert flex insets to TeX code "
 391   i = 0
 392   while True:
 393     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 394     if i == -1:
 395       return
 396     z = find_end_of_inset(lines, i)
 397     if z == -1:
 398       document.warning("Can't find end of Flex " + name + " inset.")
 399       i += 1
 400       continue
 401     # remove the \end_inset
 402     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 403     # we need to reset character layouts if necessary
 404     j = find_token(lines, '\\emph on', i, z)
 405     k = find_token(lines, '\\noun on', i, z)
 406     l = find_token(lines, '\\series', i, z)
 407     m = find_token(lines, '\\family', i, z)
 408     n = find_token(lines, '\\shape', i, z)
 409     o = find_token(lines, '\\color', i, z)
 410     p = find_token(lines, '\\size', i, z)
 411     q = find_token(lines, '\\bar under', i, z)
 412     r = find_token(lines, '\\uuline on', i, z)
 413     s = find_token(lines, '\\uwave on', i, z)
 414     t = find_token(lines, '\\strikeout on', i, z)
 415     if j != -1:
 416       lines.insert(z - 2, "\\emph default")
 417     if k != -1:
 418       lines.insert(z - 2, "\\noun default")
 419     if l != -1:
 420       lines.insert(z - 2, "\\series default")
 421     if m != -1:
 422       lines.insert(z - 2, "\\family default")
 423     if n != -1:
 424       lines.insert(z - 2, "\\shape default")
 425     if o != -1:
 426       lines.insert(z - 2, "\\color inherit")
 427     if p != -1:
 428       lines.insert(z - 2, "\\size default")
 429     if q != -1:
 430       lines.insert(z - 2, "\\bar default")
 431     if r != -1:
 432       lines.insert(z - 2, "\\uuline default")
 433     if s != -1:
 434       lines.insert(z - 2, "\\uwave default")
 435     if t != -1:
 436       lines.insert(z - 2, "\\strikeout default")
 437     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 438     i += 1
 439
 440
 441 def revert_font_attrs(lines, name, LaTeXname):
 442   " Reverts font changes to TeX code "
 443   i = 0
 444   changed = False
 445   while True:
 446     i = find_token(lines, name + ' on', i)
 447     if i == -1:
 448       return changed
 449     j = find_token(lines, name + ' default', i)
 450     k = find_token(lines, name + ' on', i + 1)
 451     # if there is no default set, the style ends with the layout
 452     # assure hereby that we found the correct layout end
 453     if j != -1 and (j < k or k == -1):
 454       lines[j:j + 1] = put_cmd_in_ert("}")
 455     else:
 456       j = find_token(lines, '\\end_layout', i)
 457       lines[j:j] = put_cmd_in_ert("}")
 458     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 459     changed = True
 460     i += 1
 461
 462
 463 def revert_layout_command(lines, name, LaTeXname):
 464   " Reverts a command from a layout to TeX code "
 465   i = 0
 466   while True:
 467     i = find_token(lines, '\\begin_layout ' + name, i)
 468     if i == -1:
 469       return
 470     k = -1
 471     # find the next layout
 472     j = i + 1
 473     while k == -1:
 474       j = find_token(lines, '\\begin_layout', j)
 475       l = len(lines)
 476       # if nothing was found it was the last layout of the document
 477       if j == -1:
 478         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 479         k = 0
 480       # exclude plain layout because this can be TeX code or another inset
 481       elif lines[j] != '\\begin_layout Plain Layout':
 482         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 483         k = 0
 484       else:
 485         j += 1
 486     lines[i] = '\\begin_layout Standard'
 487     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 488     i += 1
 489
 490
 491 def hex2ratio(s):
 492   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 493   try:
 494     val = int(s, 16)
 495   except:
 496     val = 0
 497   if val != 0:
 498     val += 1
 499   return str(val / 256.0)
 500
 501
 502 def str2bool(s):
 503   "'true' goes to True, case-insensitively, and we strip whitespace."
 504   s = s.strip().lower()
 505   return s == "true"