lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 '''
  20 This module offers several free functions to help with lyx2lyx'ing.
  21 More documentaton is below, but here is a quick guide to what
  22 they do. Optional arguments are marked by brackets.
  23
  24 add_to_preamble(document, text):
  25   Here, text can be either a single line or a list of lines. It
  26   is bad practice to pass something with embedded newlines, but
  27   we will handle that properly.
  28   The routine checks to see whether the provided material is
  29   already in the preamble. If not, it adds it.
  30   Prepends a comment "% Added by lyx2lyx" to text.
  31
  32 insert_to_preamble(document, text[, index]):
  33   Here, text can be either a single line or a list of lines. It
  34   is bad practice to pass something with embedded newlines, but
  35   we will handle that properly.
  36   The routine inserts text at document.preamble[index], where by
  37   default index is 0, so the material is inserted at the beginning.
  38   Prepends a comment "% Added by lyx2lyx" to text.
  39
  40 put_cmd_in_ert(arg):
  41   Here arg should be a list of strings (lines), which we want to
  42   wrap in ERT. Returns a list of strings so wrapped.
  43   A call to this routine will often go something like this:
  44     i = find_token('\\begin_inset FunkyInset', ...)
  45     j = find_end_of_inset(document.body, i)
  46     content = lyx2latex(document[i:j + 1])
  47     ert = put_cmd_in_ert(content)
  48     document.body[i:j+1] = ert
  49
  50 lyx2latex(document, lines):
  51   Here, lines is a list of lines of LyX material we want to convert
  52   to LaTeX. We do the best we can and return a string containing
  53   the translated material.
  54
  55 latex_length(slen):
  56     Convert lengths (in LyX form) to their LaTeX representation. Returns
  57     (bool, length), where the bool tells us if it was a percentage, and
  58     the length is the LaTeX representation.
  59
  60 '''
  61
  62 import string
  63 from parser_tools import find_token, find_end_of_inset
  64 from unicode_symbols import unicode_reps
  65
  66
  67 # This will accept either a list of lines or a single line.
  68 # It is bad practice to pass something with embedded newlines,
  69 # though we will handle that.
  70 def add_to_preamble(document, text):
  71     " Add text to the preamble if it is not already there. "
  72
  73     if not type(text) is list:
  74       # split on \n just in case
  75       # it'll give us the one element list we want
  76       # if there's no \n, too
  77       text = text.split('\n')
  78
  79     i = 0
  80     prelen = len(document.preamble)
  81     while True:
  82       i = find_token(document.preamble, text[0], i)
  83       if i == -1:
  84         break
  85       # we need a perfect match
  86       matched = True
  87       for line in text:
  88         if i >= prelen or line != document.preamble[i]:
  89           matched = False
  90           break
  91         i += 1
  92       if matched:
  93         return
  94
  95     document.preamble.extend(["% Added by lyx2lyx"])
  96     document.preamble.extend(text)
  97
  98
  99 # Note that text can be either a list of lines or a single line.
 100 # It should really be a list.
 101 def insert_to_preamble(document, text, index = 0):
 102     """ Insert text to the preamble at a given line"""
 103
 104     if not type(text) is list:
 105       # split on \n just in case
 106       # it'll give us the one element list we want
 107       # if there's no \n, too
 108       text = text.split('\n')
 109
 110     text.insert(0, "% Added by lyx2lyx")
 111     document.preamble[index:index] = text
 112
 113
 114 def put_cmd_in_ert(arg):
 115     '''
 116     arg should be a list of lines we want to wrap in ERT.
 117     Returns a list of strings, with the lines so wrapped.
 118     '''
 119
 120     ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
 121     # It will be faster for us to work with a single string internally.
 122     # That way, we only go through the unicode_reps loop once.
 123     if type(arg) is list:
 124       s = "\n".join(arg)
 125     else:
 126       s = arg
 127     for rep in unicode_reps:
 128       s = s.replace(rep[1], rep[0])
 129     s = s.replace('\\', "\\backslash\n")
 130     ret += s.splitlines()
 131     ret += ["\\end_layout", "", "\\end_inset"]
 132     return ret
 133
 134
 135 def get_ert(lines, i):
 136     'Convert an ERT inset into LaTeX.'
 137     if not lines[i].startswith("\\begin_inset ERT"):
 138         return ""
 139     j = find_end_of_inset(lines, i)
 140     if j == -1:
 141         return ""
 142     while i < j and not lines[i].startswith("status"):
 143         i = i + 1
 144     i = i + 1
 145     ret = ""
 146     first = True
 147     while i < j:
 148         if lines[i] == "\\begin_layout Plain Layout":
 149             if first:
 150                 first = False
 151             else:
 152                 ret = ret + "\n"
 153             while i + 1 < j and lines[i+1] == "":
 154                 i = i + 1
 155         elif lines[i] == "\\end_layout":
 156             while i + 1 < j and lines[i+1] == "":
 157                 i = i + 1
 158         elif lines[i] == "\\backslash":
 159             ret = ret + "\\"
 160         else:
 161             ret = ret + lines[i]
 162         i = i + 1
 163     return ret
 164
 165
 166 def lyx2latex(document, lines):
 167     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 168
 169     content = ""
 170     ert_end = 0
 171     note_end = 0
 172     hspace = ""
 173
 174     for curline in range(len(lines)):
 175       line = lines[curline]
 176       if line.startswith("\\begin_inset Note Note"):
 177           # We want to skip LyX notes, so remember where the inset ends
 178           note_end = find_end_of_inset(lines, curline + 1)
 179           continue
 180       elif note_end >= curline:
 181           # Skip LyX notes
 182           continue
 183       elif line.startswith("\\begin_inset ERT"):
 184           # We don't want to replace things inside ERT, so figure out
 185           # where the end of the inset is.
 186           ert_end = find_end_of_inset(lines, curline + 1)
 187           continue
 188       elif line.startswith("\\begin_inset Formula"):
 189           line = line[20:]
 190       elif line.startswith("\\begin_inset Quotes"):
 191           # For now, we do a very basic reversion. Someone who understands
 192           # quotes is welcome to fix it up.
 193           qtype = line[20:].strip()
 194           # lang = qtype[0]
 195           side = qtype[1]
 196           dbls = qtype[2]
 197           if side == "l":
 198               if dbls == "d":
 199                   line = "``"
 200               else:
 201                   line = "`"
 202           else:
 203               if dbls == "d":
 204                   line = "''"
 205               else:
 206                   line = "'"
 207       elif line.startswith("\\begin_inset Newline newline"):
 208           line = "\\\\ "
 209       elif line.startswith("\\noindent"):
 210           line = "\\noindent " # we need the space behind the command
 211       elif line.startswith("\\begin_inset space"):
 212           line = line[18:].strip()
 213           if line.startswith("\\hspace"):
 214               # Account for both \hspace and \hspace*
 215               hspace = line[:-2]
 216               continue
 217           elif line == "\\space{}":
 218               line = "\\ "
 219           elif line == "\\thinspace{}":
 220               line = "\\,"
 221       elif hspace != "":
 222           # The LyX length is in line[8:], after the \length keyword
 223           length = latex_length(line[8:])[1]
 224           line = hspace + "{" + length + "}"
 225           hspace = ""
 226       elif line.isspace() or \
 227             line.startswith("\\begin_layout") or \
 228             line.startswith("\\end_layout") or \
 229             line.startswith("\\begin_inset") or \
 230             line.startswith("\\end_inset") or \
 231             line.startswith("\\lang") or \
 232             line.strip() == "status collapsed" or \
 233             line.strip() == "status open":
 234           #skip all that stuff
 235           continue
 236
 237       # this needs to be added to the preamble because of cases like
 238       # \textmu, \textbackslash, etc.
 239       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 240                                  '\\@ifundefined{textmu}',
 241                                  ' {\\usepackage{textcomp}}{}'])
 242       # a lossless reversion is not possible
 243       # try at least to handle some common insets and settings
 244       if ert_end >= curline:
 245           line = line.replace(r'\backslash', '\\')
 246       else:
 247           # No need to add "{}" after single-nonletter macros
 248           line = line.replace('&', '\\&')
 249           line = line.replace('#', '\\#')
 250           line = line.replace('^', '\\textasciicircum{}')
 251           line = line.replace('%', '\\%')
 252           line = line.replace('_', '\\_')
 253           line = line.replace('$', '\\$')
 254
 255           # Do the LyX text --> LaTeX conversion
 256           for rep in unicode_reps:
 257               line = line.replace(rep[1], rep[0])
 258           line = line.replace(r'\backslash', r'\textbackslash{}')
 259           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 260           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 261           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 262           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 263           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 264           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 265           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 266           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 267           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 268       content += line
 269     return content
 270
 271
 272 def latex_length(slen):
 273     '''
 274     Convert lengths to their LaTeX representation. Returns (bool, length),
 275     where the bool tells us if it was a percentage, and the length is the
 276     LaTeX representation.
 277     '''
 278     i = 0
 279     percent = False
 280     # the slen has the form
 281     # ValueUnit+ValueUnit-ValueUnit or
 282     # ValueUnit+-ValueUnit
 283     # the + and - (glue lengths) are optional
 284     # the + always precedes the -
 285
 286     # Convert relative lengths to LaTeX units
 287     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 288              "page%":"\\paperwidth", "line%":"\\linewidth",
 289              "theight%":"\\textheight", "pheight%":"\\paperheight"}
 290     for unit in list(units.keys()):
 291         i = slen.find(unit)
 292         if i == -1:
 293             continue
 294         percent = True
 295         minus = slen.rfind("-", 1, i)
 296         plus = slen.rfind("+", 0, i)
 297         latex_unit = units[unit]
 298         if plus == -1 and minus == -1:
 299             value = slen[:i]
 300             value = str(float(value)/100)
 301             end = slen[i + len(unit):]
 302             slen = value + latex_unit + end
 303         if plus > minus:
 304             value = slen[plus + 1:i]
 305             value = str(float(value)/100)
 306             begin = slen[:plus + 1]
 307             end = slen[i+len(unit):]
 308             slen = begin + value + latex_unit + end
 309         if plus < minus:
 310             value = slen[minus + 1:i]
 311             value = str(float(value)/100)
 312             begin = slen[:minus + 1]
 313             slen = begin + value + latex_unit
 314
 315     # replace + and -, but only if the - is not the first character
 316     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 317     # handle the case where "+-1mm" was used, because LaTeX only understands
 318     # "plus 1mm minus 1mm"
 319     if slen.find("plus  minus"):
 320         lastvaluepos = slen.rfind(" ")
 321         lastvalue = slen[lastvaluepos:]
 322         slen = slen.replace("  ", lastvalue + " ")
 323     return (percent, slen)
 324
 325
 326 def revert_flex_inset(lines, name, LaTeXname):
 327   " Convert flex insets to TeX code "
 328   i = 0
 329   while True:
 330     i = find_token(lines, '\\begin_inset Flex ' + name, i)
 331     if i == -1:
 332       return
 333     z = find_end_of_inset(lines, i)
 334     if z == -1:
 335       document.warning("Can't find end of Flex " + name + " inset.")
 336       i += 1
 337       continue
 338     # remove the \end_inset
 339     lines[z - 2:z + 1] = put_cmd_in_ert("}")
 340     # we need to reset character layouts if necessary
 341     j = find_token(lines, '\\emph on', i, z)
 342     k = find_token(lines, '\\noun on', i, z)
 343     l = find_token(lines, '\\series', i, z)
 344     m = find_token(lines, '\\family', i, z)
 345     n = find_token(lines, '\\shape', i, z)
 346     o = find_token(lines, '\\color', i, z)
 347     p = find_token(lines, '\\size', i, z)
 348     q = find_token(lines, '\\bar under', i, z)
 349     r = find_token(lines, '\\uuline on', i, z)
 350     s = find_token(lines, '\\uwave on', i, z)
 351     t = find_token(lines, '\\strikeout on', i, z)
 352     if j != -1:
 353       lines.insert(z - 2, "\\emph default")
 354     if k != -1:
 355       lines.insert(z - 2, "\\noun default")
 356     if l != -1:
 357       lines.insert(z - 2, "\\series default")
 358     if m != -1:
 359       lines.insert(z - 2, "\\family default")
 360     if n != -1:
 361       lines.insert(z - 2, "\\shape default")
 362     if o != -1:
 363       lines.insert(z - 2, "\\color inherit")
 364     if p != -1:
 365       lines.insert(z - 2, "\\size default")
 366     if q != -1:
 367       lines.insert(z - 2, "\\bar default")
 368     if r != -1:
 369       lines.insert(z - 2, "\\uuline default")
 370     if s != -1:
 371       lines.insert(z - 2, "\\uwave default")
 372     if t != -1:
 373       lines.insert(z - 2, "\\strikeout default")
 374     lines[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 375     i += 1
 376
 377
 378 def revert_font_attrs(lines, name, LaTeXname):
 379   " Reverts font changes to TeX code "
 380   i = 0
 381   changed = False
 382   while True:
 383     i = find_token(lines, name + ' on', i)
 384     if i == -1:
 385       return changed
 386     j = find_token(lines, name + ' default', i)
 387     k = find_token(lines, name + ' on', i + 1)
 388     # if there is no default set, the style ends with the layout
 389     # assure hereby that we found the correct layout end
 390     if j != -1 and (j < k or k == -1):
 391       lines[j:j + 1] = put_cmd_in_ert("}")
 392     else:
 393       j = find_token(lines, '\\end_layout', i)
 394       lines[j:j] = put_cmd_in_ert("}")
 395     lines[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 396     changed = True
 397     i += 1
 398
 399
 400 def revert_layout_command(lines, name, LaTeXname):
 401   " Reverts a command from a layout to TeX code "
 402   i = 0
 403   while True:
 404     i = find_token(lines, '\\begin_layout ' + name, i)
 405     if i == -1:
 406       return
 407     k = -1
 408     # find the next layout
 409     j = i + 1
 410     while k == -1:
 411       j = find_token(lines, '\\begin_layout', j)
 412       l = len(lines)
 413       # if nothing was found it was the last layout of the document
 414       if j == -1:
 415         lines[l - 4:l - 4] = put_cmd_in_ert("}")
 416         k = 0
 417       # exclude plain layout because this can be TeX code or another inset
 418       elif lines[j] != '\\begin_layout Plain Layout':
 419         lines[j - 2:j - 2] = put_cmd_in_ert("}")
 420         k = 0
 421       else:
 422         j += 1
 423     lines[i] = '\\begin_layout Standard'
 424     lines[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 425     i += 1
 426
 427
 428 def hex2ratio(s):
 429   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 430   try:
 431     val = int(s, 16)
 432   except:
 433     val = 0
 434   if val != 0:
 435     val += 1
 436   return str(val / 256.0)
 437
 438
 439 def str2bool(s):
 440   "'true' goes to True, case-insensitively, and we strip whitespace."
 441   s = s.strip().lower()
 442   return s == "true"