lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(arg):
  41   Here arg should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 lyx2latex(document, lines):
  51   Here, lines is a list of lines of LyX material we want to convert
  52   to LaTeX. We do the best we can and return a string containing
  53   the translated material.
  54
  55 lyx2verbatim(document, lines):
  56   Here, lines is a list of lines of LyX material we want to convert
  57   to verbatim material (used in ERT an the like). We do the best we
  58   can and return a string containing the translated material.
  59
  60 latex_length(slen):
  61     Convert lengths (in LyX form) to their LaTeX representation. Returns
  62     (bool, length), where the bool tells us if it was a percentage, and
  63     the length is the LaTeX representation.
  64
  65 '''
  66
  67 import re
  68 import string
  69 from parser_tools import find_token, find_end_of_inset
  70 from unicode_symbols import unicode_reps
  71
  72
  73 # This will accept either a list of lines or a single line.
  74 # It is bad practice to pass something with embedded newlines,
  75 # though we will handle that.
  76 def add_to_preamble(document, text):
  77     " Add text to the preamble if it is not already there. "
  78
  79     if not type(text) is list:
  80       # split on \n just in case
  81       # it'll give us the one element list we want
  82       # if there's no \n, too
  83       text = text.split('\n')
  84
  85     i = 0
  86     prelen = len(document.preamble)
  87     while True:
  88       i = find_token(document.preamble, text[0], i)
  89       if i == -1:
  90         break
  91       # we need a perfect match
  92       matched = True
  93       for line in text:
  94         if i >= prelen or line != document.preamble[i]:
  95           matched = False
  96           break
  97         i += 1
  98       if matched:
  99         return
 100
 101     document.preamble.extend(["% Added by lyx2lyx"])
 102     document.preamble.extend(text)
 103
 104
 105 # Note that text can be either a list of lines or a single line.
 106 # It should really be a list.
 107 def insert_to_preamble(document, text, index = 0):
 108     """ Insert text to the preamble at a given line"""
 109
 110     if not type(text) is list:
 111       # split on \n just in case
 112       # it'll give us the one element list we want
 113       # if there's no \n, too
 114       text = text.split('\n')
 115
 116     text.insert(0, "% Added by lyx2lyx")
 117     document.preamble[index:index] = text
 118
 119
 120 def put_cmd_in_ert(arg):
 121     '''
 122     arg should be a list of lines we want to wrap in ERT.
 123     Returns a list of strings, with the lines so wrapped.
 124     '''
 125
 126     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 127     # It will be faster for us to work with a single string internally.
 128     # That way, we only go through the unicode_reps loop once.
 129     if type(arg) is list:
 130       s = "\n".join(arg)
 131     else:
 132       s = arg
 133     for rep in unicode_reps:
 134       s = s.replace(rep[1], rep[0])
 135     s = s.replace('\\', "\\backslash\n")
 136     ret += s.splitlines()
 137     ret += ["\\end_layout", "", "\\end_inset"]
 138     return ret
 139
 140
 141 def get_ert(lines, i, verbatim = False):
 142     'Convert an ERT inset into LaTeX.'
 143     if not lines[i].startswith("\\begin_inset ERT"):
 144         return ""
 145     j = find_end_of_inset(lines, i)
 146     if j == -1:
 147         return ""
 148     while i < j and not lines[i].startswith("status"):
 149         i = i + 1
 150     i = i + 1
 151     ret = ""
 152     first = True
 153     while i < j:
 154         if lines[i] == "\\begin_layout Plain Layout":
 155             if first:
 156                 first = False
 157             else:
 158                 ret = ret + "\n"
 159             while i + 1 < j and lines[i+1] == "":
 160                 i = i + 1
 161         elif lines[i] == "\\end_layout":
 162             while i + 1 < j and lines[i+1] == "":
 163                 i = i + 1
 164         elif lines[i] == "\\backslash" and not verbatim:
 165             ret = ret + "\\"
 166         else:
 167             ret = ret + lines[i]
 168         i = i + 1
 169     return ret
 170
 171
 172 def lyx2latex(document, lines):
 173     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 174
 175     content = ""
 176     ert_end = 0
 177     note_end = 0
 178     hspace = ""
 179
 180     for curline in range(len(lines)):
 181       line = lines[curline]
 182       if line.startswith("\\begin_inset Note Note"):
 183           # We want to skip LyX notes, so remember where the inset ends
 184           note_end = find_end_of_inset(lines, curline + 1)
 185           continue
 186       elif note_end >= curline:
 187           # Skip LyX notes
 188           continue
 189       elif line.startswith("\\begin_inset ERT"):
 190           # We don't want to replace things inside ERT, so figure out
 191           # where the end of the inset is.
 192           ert_end = find_end_of_inset(lines, curline + 1)
 193           continue
 194       elif line.startswith("\\begin_inset Formula"):
 195           line = line[20:]
 196       elif line.startswith("\\begin_inset Quotes"):
 197           # For now, we do a very basic reversion. Someone who understands
 198           # quotes is welcome to fix it up.
 199           qtype = line[20:].strip()
 200           # lang = qtype[0]
 201           side = qtype[1]
 202           dbls = qtype[2]
 203           if side == "l":
 204               if dbls == "d":
 205                   line = "``"
 206               else:
 207                   line = "`"
 208           else:
 209               if dbls == "d":
 210                   line = "''"
 211               else:
 212                   line = "'"
 213       elif line.startswith("\\begin_inset Newline newline"):
 214           line = "\\\\ "
 215       elif line.startswith("\\noindent"):
 216           line = "\\noindent " # we need the space behind the command
 217       elif line.startswith("\\begin_inset space"):
 218           line = line[18:].strip()
 219           if line.startswith("\\hspace"):
 220               # Account for both \hspace and \hspace*
 221               hspace = line[:-2]
 222               continue
 223           elif line == "\\space{}":
 224               line = "\\ "
 225           elif line == "\\thinspace{}":
 226               line = "\\,"
 227       elif hspace != "":
 228           # The LyX length is in line[8:], after the \length keyword
 229           length = latex_length(line[8:])[1]
 230           line = hspace + "{" + length + "}"
 231           hspace = ""
 232       elif line.isspace() or \
 233             line.startswith("\\begin_layout") or \
 234             line.startswith("\\end_layout") or \
 235             line.startswith("\\begin_inset") or \
 236             line.startswith("\\end_inset") or \
 237             line.startswith("\\lang") or \
 238             line.strip() == "status collapsed" or \
 239             line.strip() == "status open":
 240           #skip all that stuff
 241           continue
 242
 243       # this needs to be added to the preamble because of cases like
 244       # \textmu, \textbackslash, etc.
 245       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 246                                  '\\@ifundefined{textmu}',
 247                                  ' {\\usepackage{textcomp}}{}'])
 248       # a lossless reversion is not possible
 249       # try at least to handle some common insets and settings
 250       if ert_end >= curline:
 251           line = line.replace(r'\backslash', '\\')
 252       else:
 253           # No need to add "{}" after single-nonletter macros
 254           line = line.replace('&', '\\&')
 255           line = line.replace('#', '\\#')
 256           line = line.replace('^', '\\textasciicircum{}')
 257           line = line.replace('%', '\\%')
 258           line = line.replace('_', '\\_')
 259           line = line.replace('$', '\\$')
 260
 261           # Do the LyX text --> LaTeX conversion
 262           for rep in unicode_reps:
 263               line = line.replace(rep[1], rep[0])
 264           line = line.replace(r'\backslash', r'\textbackslash{}')
 265           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 266           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 267           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 268           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 269           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 270           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 271           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 272           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 273           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 274       content += line
 275     return content
 276
 277
 278 def lyx2verbatim(document, lines):
 279     'Convert some LyX stuff into corresponding verbatim stuff, as best we can.'
 280
 281     content = lyx2latex(document, lines)
 282     content = re.sub(r'\\(?!backslash)', r'\n\\backslash\n', content)
 283
 284     return content
 285
 286
 287 def latex_length(slen):
 288     '''
 289     Convert lengths to their LaTeX representation. Returns (bool, length),
 290     where the bool tells us if it was a percentage, and the length is the
 291     LaTeX representation.
 292     '''
 293     i = 0
 294     percent = False
 295     # the slen has the form
 296     # ValueUnit+ValueUnit-ValueUnit or
 297     # ValueUnit+-ValueUnit
 298     # the + and - (glue lengths) are optional
 299     # the + always precedes the -
 300
 301     # Convert relative lengths to LaTeX units
 302     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 303              "page%":"\\paperwidth", "line%":"\\linewidth",
 304              "theight%":"\\textheight", "pheight%":"\\paperheight"}
 305     for unit in list(units.keys()):
 306         i = slen.find(unit)
 307         if i == -1:
 308             continue
 309         percent = True
 310         minus = slen.rfind("-", 1, i)
 311         plus = slen.rfind("+", 0, i)
 312         latex_unit = units[unit]
 313         if plus == -1 and minus == -1:
 314             value = slen[:i]
 315             value = str(float(value)/100)
 316             end = slen[i + len(unit):]
 317             slen = value + latex_unit + end
 318         if plus > minus:
 319             value = slen[plus + 1:i]
 320             value = str(float(value)/100)
 321             begin = slen[:plus + 1]
 322             end = slen[i+len(unit):]
 323             slen = begin + value + latex_unit + end
 324         if plus < minus:
 325             value = slen[minus + 1:i]
 326             value = str(float(value)/100)
 327             begin = slen[:minus + 1]
 328             slen = begin + value + latex_unit
 329
 330     # replace + and -, but only if the - is not the first character
 331     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 332     # handle the case where "+-1mm" was used, because LaTeX only understands
 333     # "plus 1mm minus 1mm"
 334     if slen.find("plus  minus"):
 335         lastvaluepos = slen.rfind(" ")
 336         lastvalue = slen[lastvaluepos:]
 337         slen = slen.replace("  ", lastvalue + " ")
 338     return (percent, slen)
 339
 340
 341 def length_in_bp(length):
 342     " Convert a length in LyX format to its value in bp units "
 343
 344     em_width = 10.0 / 72.27 # assume 10pt font size
 345     text_width = 8.27 / 1.7 # assume A4 with default margins
 346     # scale factors are taken from Length::inInch()
 347     scales = {"bp"       : 1.0,
 348               "cc"       : (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 349               "cm"       : (72.0 / 2.54),
 350               "dd"       : (72.0 / (72.27 / (0.376 * 2.845))),
 351               "em"       : (72.0 * em_width),
 352               "ex"       : (72.0 * em_width * 0.4305),
 353               "in"       : 72.0,
 354               "mm"       : (72.0 / 25.4),
 355               "mu"       : (72.0 * em_width / 18.0),
 356               "pc"       : (72.0 / (72.27 / 12.0)),
 357               "pt"       : (72.0 / (72.27)),
 358               "sp"       : (72.0 / (72.27 * 65536.0)),
 359               "text%"    : (72.0 * text_width / 100.0),
 360               "col%"     : (72.0 * text_width / 100.0), # assume 1 column
 361               "page%"    : (72.0 * text_width * 1.7 / 100.0),
 362               "line%"    : (72.0 * text_width / 100.0),
 363               "theight%" : (72.0 * text_width * 1.787 / 100.0),
 364               "pheight%" : (72.0 * text_width * 2.2 / 100.0)}
 365
 366     rx = re.compile(r'^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$')
 367     m = rx.match(length)
 368     if not m:
 369         document.warning("Invalid length value: " + length + ".")
 370         return 0
 371     value = m.group(1)
 372     unit = m.group(2)
 373     if not unit in scales.keys():
 374         document.warning("Unknown length unit: " + unit + ".")
 375         return value
 376     return "%g" % (float(value) * scales[unit])
 377
 378
 379 def revert_flex_inset(lines, name, LaTeXname):
 380   " Convert flex insets to TeX code "
 381   i = 0
 382   while True:
 383     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 384     if i == -1:
 385       return
 386     z = find_end_of_inset(lines, i)
 387     if z == -1:
 388       document.warning("Can't find end of Flex " + name + " inset.")
 389       i += 1
 390       continue
 391     # remove the \end_inset
 392     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 393     # we need to reset character layouts if necessary
 394     j = find_token(lines, '\\emph on', i, z)
 395     k = find_token(lines, '\\noun on', i, z)
 396     l = find_token(lines, '\\series', i, z)
 397     m = find_token(lines, '\\family', i, z)
 398     n = find_token(lines, '\\shape', i, z)
 399     o = find_token(lines, '\\color', i, z)
 400     p = find_token(lines, '\\size', i, z)
 401     q = find_token(lines, '\\bar under', i, z)
 402     r = find_token(lines, '\\uuline on', i, z)
 403     s = find_token(lines, '\\uwave on', i, z)
 404     t = find_token(lines, '\\strikeout on', i, z)
 405     if j != -1:
 406       lines.insert(z - 2, "\\emph default")
 407     if k != -1:
 408       lines.insert(z - 2, "\\noun default")
 409     if l != -1:
 410       lines.insert(z - 2, "\\series default")
 411     if m != -1:
 412       lines.insert(z - 2, "\\family default")
 413     if n != -1:
 414       lines.insert(z - 2, "\\shape default")
 415     if o != -1:
 416       lines.insert(z - 2, "\\color inherit")
 417     if p != -1:
 418       lines.insert(z - 2, "\\size default")
 419     if q != -1:
 420       lines.insert(z - 2, "\\bar default")
 421     if r != -1:
 422       lines.insert(z - 2, "\\uuline default")
 423     if s != -1:
 424       lines.insert(z - 2, "\\uwave default")
 425     if t != -1:
 426       lines.insert(z - 2, "\\strikeout default")
 427     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 428     i += 1
 429
 430
 431 def revert_font_attrs(lines, name, LaTeXname):
 432   " Reverts font changes to TeX code "
 433   i = 0
 434   changed = False
 435   while True:
 436     i = find_token(lines, name + ' on', i)
 437     if i == -1:
 438       return changed
 439     j = find_token(lines, name + ' default', i)
 440     k = find_token(lines, name + ' on', i + 1)
 441     # if there is no default set, the style ends with the layout
 442     # assure hereby that we found the correct layout end
 443     if j != -1 and (j < k or k == -1):
 444       lines[j:j + 1] = put_cmd_in_ert("}")
 445     else:
 446       j = find_token(lines, '\\end_layout', i)
 447       lines[j:j] = put_cmd_in_ert("}")
 448     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 449     changed = True
 450     i += 1
 451
 452
 453 def revert_layout_command(lines, name, LaTeXname):
 454   " Reverts a command from a layout to TeX code "
 455   i = 0
 456   while True:
 457     i = find_token(lines, '\\begin_layout ' + name, i)
 458     if i == -1:
 459       return
 460     k = -1
 461     # find the next layout
 462     j = i + 1
 463     while k == -1:
 464       j = find_token(lines, '\\begin_layout', j)
 465       l = len(lines)
 466       # if nothing was found it was the last layout of the document
 467       if j == -1:
 468         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 469         k = 0
 470       # exclude plain layout because this can be TeX code or another inset
 471       elif lines[j] != '\\begin_layout Plain Layout':
 472         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 473         k = 0
 474       else:
 475         j += 1
 476     lines[i] = '\\begin_layout Standard'
 477     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 478     i += 1
 479
 480
 481 def hex2ratio(s):
 482   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 483   try:
 484     val = int(s, 16)
 485   except:
 486     val = 0
 487   if val != 0:
 488     val += 1
 489   return str(val / 256.0)
 490
 491
 492 def str2bool(s):
 493   "'true' goes to True, case-insensitively, and we strip whitespace."
 494   s = s.strip().lower()
 495   return s == "true"