lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2010 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 " This modules offer several free functions to help with lyx2lyx'ing. "
  20
  21 import string
  22 from parser_tools import find_token
  23 from unicode_symbols import unicode_reps
  24
  25
  26 # This will accept either a list of lines or a single line.
  27 # It is bad practice to pass something with embedded newlines,
  28 # though we will handle that.
  29 def add_to_preamble(document, text):
  30     " Add text to the preamble if it is not already there. "
  31
  32     if not type(text) is list:
  33       # split on \n just in case
  34       # it'll give us the one element list we want
  35       # if there's no \n, too
  36       text = text.split('\n')
  37
  38     i = 0
  39     prelen = len(document.preamble)
  40     while True:
  41       i = find_token(document.preamble, text[0], i)
  42       if i == -1:
  43         break
  44       # we need a perfect match
  45       matched = True
  46       for line in text:
  47         if i >= prelen or line != document.preamble[i]:
  48           matched = False
  49           break
  50         i += 1
  51       if matched:
  52         return
  53
  54     document.preamble.extend(text)
  55
  56
  57 # Note that text can be either a list of lines or a single line.
  58 # It should really be a list.
  59 def insert_to_preamble(index, document, text):
  60     """ Insert text to the preamble at a given line"""
  61
  62     if not type(text) is list:
  63       # split on \n just in case
  64       # it'll give us the one element list we want
  65       # if there's no \n, too
  66       text = text.split('\n')
  67
  68     document.preamble[index:index] = text
  69
  70
  71 # This routine wraps some content in an ERT inset.
  72 #
  73 # NOTE: The function accepts either a single string or a LIST of strings as
  74 # argument. But it returns a LIST of strings, split on \n, so that it does
  75 # not have embedded newlines.
  76 #
  77 # This is how lyx2lyx represents a LyX document: as a list of strings,
  78 # each representing a line of a LyX file. Embedded newlines confuse
  79 # lyx2lyx very much.
  80 #
  81 # A call to this routine will often go something like this:
  82 #   i = find_token('\\begin_inset FunkyInset', ...)
  83 #   ...
  84 #   j = find_end_of_inset(document.body, i)
  85 #   content = ...extract content from insets
  86 #   # that could be as simple as:
  87 #   # content = lyx2latex(document[i:j + 1])
  88 #   ert = put_cmd_in_ert(content)
  89 #   document.body[i:j] = ert
  90 # Now, before we continue, we need to reset i appropriately. Normally,
  91 # this would be:
  92 #   i += len(ert)
  93 # That puts us right after the ERT we just inserted.
  94 #
  95 def put_cmd_in_ert(arg):
  96     ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""]
  97     # Despite the warnings just given, it will be faster for us to work
  98     # with a single string internally. That way, we only go through the
  99     # unicode_reps loop once.
 100     if type(arg) is list:
 101       s = "\n".join(arg)
 102     else:
 103       s = arg
 104     for rep in unicode_reps:
 105       s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
 106     s = s.replace('\\', "\\backslash\n")
 107     ret += s.splitlines()
 108     ret += ["\\end_layout", "\\end_inset"]
 109     return ret
 110
 111
 112 def lyx2latex(document, lines):
 113     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 114     # clean up multiline stuff
 115     content = ""
 116     ert_end = 0
 117     note_end = 0
 118     hspace = ""
 119
 120     for curline in range(len(lines)):
 121       line = lines[curline]
 122       if line.startswith("\\begin_inset Note Note"):
 123           # We want to skip LyX notes, so remember where the inset ends
 124           note_end = find_end_of_inset(lines, curline + 1)
 125           continue
 126       elif note_end >= curline:
 127           # Skip LyX notes
 128           continue
 129       elif line.startswith("\\begin_inset ERT"):
 130           # We don't want to replace things inside ERT, so figure out
 131           # where the end of the inset is.
 132           ert_end = find_end_of_inset(lines, curline + 1)
 133           continue
 134       elif line.startswith("\\begin_inset Formula"):
 135           line = line[20:]
 136       elif line.startswith("\\begin_inset Quotes"):
 137           # For now, we do a very basic reversion. Someone who understands
 138           # quotes is welcome to fix it up.
 139           qtype = line[20:].strip()
 140           # lang = qtype[0]
 141           side = qtype[1]
 142           dbls = qtype[2]
 143           if side == "l":
 144               if dbls == "d":
 145                   line = "``"
 146               else:
 147                   line = "`"
 148           else:
 149               if dbls == "d":
 150                   line = "''"
 151               else:
 152                   line = "'"
 153       elif line.startswith("\\begin_inset space"):
 154           line = line[18:].strip()
 155           if line.startswith("\\hspace"):
 156               # Account for both \hspace and \hspace*
 157               hspace = line[:-2]
 158               continue
 159           elif line == "\\space{}":
 160               line = "\\ "
 161           elif line == "\\thinspace{}":
 162               line = "\\,"
 163       elif hspace != "":
 164           # The LyX length is in line[8:], after the \length keyword
 165           length = latex_length(line[8:])[1]
 166           line = hspace + "{" + length + "}"
 167           hspace = ""
 168       elif line.isspace() or \
 169             line.startswith("\\begin_layout") or \
 170             line.startswith("\\end_layout") or \
 171             line.startswith("\\begin_inset") or \
 172             line.startswith("\\end_inset") or \
 173             line.startswith("\\lang") or \
 174             line.strip() == "status collapsed" or \
 175             line.strip() == "status open":
 176           #skip all that stuff
 177           continue
 178
 179       # this needs to be added to the preamble because of cases like
 180       # \textmu, \textbackslash, etc.
 181       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 182                                  '\\@ifundefined{textmu}',
 183                                  ' {\\usepackage{textcomp}}{}'])
 184       # a lossless reversion is not possible
 185       # try at least to handle some common insets and settings
 186       if ert_end >= curline:
 187           line = line.replace(r'\backslash', '\\')
 188       else:
 189           # No need to add "{}" after single-nonletter macros
 190           line = line.replace('&', '\\&')
 191           line = line.replace('#', '\\#')
 192           line = line.replace('^', '\\textasciicircum{}')
 193           line = line.replace('%', '\\%')
 194           line = line.replace('_', '\\_')
 195           line = line.replace('$', '\\$')
 196
 197           # Do the LyX text --> LaTeX conversion
 198           for rep in unicode_reps:
 199             line = line.replace(rep[1], rep[0] + "{}")
 200           line = line.replace(r'\backslash', r'\textbackslash{}')
 201           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 202           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 203           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 204           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 205           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 206           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 207           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 208           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 209           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 210       content += line
 211     return content
 212
 213
 214 def latex_length(slen):
 215     '''
 216     Convert lengths to their LaTeX representation. Returns (bool, length),
 217     where the bool tells us if it was a percentage, and the length is the
 218     LaTeX representation.
 219     '''
 220     i = 0
 221     percent = False
 222     # the slen has the form
 223     # ValueUnit+ValueUnit-ValueUnit or
 224     # ValueUnit+-ValueUnit
 225     # the + and - (glue lengths) are optional
 226     # the + always precedes the -
 227
 228     # Convert relative lengths to LaTeX units
 229     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 230              "page%":"\\paperwidth", "line%":"\\linewidth",
 231              "theight%":"\\textheight", "pheight%":"\\paperheight"}
 232     for unit in units.keys():
 233         i = slen.find(unit)
 234         if i == -1:
 235             continue
 236         percent = True
 237         minus = slen.rfind("-", 1, i)
 238         plus = slen.rfind("+", 0, i)
 239         latex_unit = units[unit]
 240         if plus == -1 and minus == -1:
 241             value = slen[:i]
 242             value = str(float(value)/100)
 243             end = slen[i + len(unit):]
 244             slen = value + latex_unit + end
 245         if plus > minus:
 246             value = slen[plus + 1:i]
 247             value = str(float(value)/100)
 248             begin = slen[:plus + 1]
 249             end = slen[i+len(unit):]
 250             slen = begin + value + latex_unit + end
 251         if plus < minus:
 252             value = slen[minus + 1:i]
 253             value = str(float(value)/100)
 254             begin = slen[:minus + 1]
 255             slen = begin + value + latex_unit
 256
 257     # replace + and -, but only if the - is not the first character
 258     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 259     # handle the case where "+-1mm" was used, because LaTeX only understands
 260     # "plus 1mm minus 1mm"
 261     if slen.find("plus  minus"):
 262         lastvaluepos = slen.rfind(" ")
 263         lastvalue = slen[lastvaluepos:]
 264         slen = slen.replace("  ", lastvalue + " ")
 265     return (percent, slen)
 266
 267
 268 def revert_flex_inset(document, name, LaTeXname, position):
 269   " Convert flex insets to TeX code "
 270   i = position
 271   while True:
 272     i = find_token(document.body, '\\begin_inset Flex ' + name, i)
 273     if i == -1:
 274       return
 275     z = find_end_of_inset(document.body, i)
 276     if z == -1:
 277       document.warning("Malformed LyX document: Can't find end of Flex " + name + " inset.")
 278       return
 279     # remove the \end_inset
 280     document.body[z - 2:z + 1] = put_cmd_in_ert("}")
 281     # we need to reset character layouts if necessary
 282     j = find_token(document.body, '\\emph on', i, z)
 283     k = find_token(document.body, '\\noun on', i, z)
 284     l = find_token(document.body, '\\series', i, z)
 285     m = find_token(document.body, '\\family', i, z)
 286     n = find_token(document.body, '\\shape', i, z)
 287     o = find_token(document.body, '\\color', i, z)
 288     p = find_token(document.body, '\\size', i, z)
 289     q = find_token(document.body, '\\bar under', i, z)
 290     r = find_token(document.body, '\\uuline on', i, z)
 291     s = find_token(document.body, '\\uwave on', i, z)
 292     t = find_token(document.body, '\\strikeout on', i, z)
 293     if j != -1:
 294       document.body.insert(z - 2, "\\emph default")
 295     if k != -1:
 296       document.body.insert(z - 2, "\\noun default")
 297     if l != -1:
 298       document.body.insert(z - 2, "\\series default")
 299     if m != -1:
 300       document.body.insert(z - 2, "\\family default")
 301     if n != -1:
 302       document.body.insert(z - 2, "\\shape default")
 303     if o != -1:
 304       document.body.insert(z - 2, "\\color inherit")
 305     if p != -1:
 306       document.body.insert(z - 2, "\\size default")
 307     if q != -1:
 308       document.body.insert(z - 2, "\\bar default")
 309     if r != -1:
 310       document.body.insert(z - 2, "\\uuline default")
 311     if s != -1:
 312       document.body.insert(z - 2, "\\uwave default")
 313     if t != -1:
 314       document.body.insert(z - 2, "\\strikeout default")
 315     document.body[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
 316     i += 1
 317
 318
 319 def revert_font_attrs(document, name, LaTeXname):
 320   " Reverts font changes to TeX code "
 321   i = 0
 322   changed = False
 323   while True:
 324     i = find_token(document.body, name + ' on', i)
 325     if i == -1:
 326       return changed
 327     j = find_token(document.body, name + ' default', i)
 328     k = find_token(document.body, name + ' on', i + 1)
 329     # if there is no default set, the style ends with the layout
 330     # assure hereby that we found the correct layout end
 331     if j != -1 and (j < k or k == -1):
 332       document.body[j:j + 1] = put_cmd_in_ert("}")
 333     else:
 334       j = find_token(document.body, '\\end_layout', i)
 335       document.body[j:j] = put_cmd_in_ert("}")
 336     document.body[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 337     changed = True
 338     i += 1
 339
 340
 341 def revert_layout_command(document, name, LaTeXname, position):
 342   " Reverts a command from a layout to TeX code "
 343   i = position
 344   while True:
 345     i = find_token(document.body, '\\begin_layout ' + name, i)
 346     if i == -1:
 347       return
 348     k = -1
 349     # find the next layout
 350     j = i + 1
 351     while k == -1:
 352       j = find_token(document.body, '\\begin_layout', j)
 353       l = len(document.body)
 354       # if nothing was found it was the last layout of the document
 355       if j == -1:
 356         document.body[l - 4:l - 4] = put_cmd_in_ert("}")
 357         k = 0
 358       # exclude plain layout because this can be TeX code or another inset
 359       elif document.body[j] != '\\begin_layout Plain Layout':
 360         document.body[j - 2:j - 2] = put_cmd_in_ert("}")
 361         k = 0
 362       else:
 363         j += 1
 364     document.body[i] = '\\begin_layout Standard'
 365     document.body[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
 366     i += 1
 367
 368
 369 def hex2ratio(s):
 370   " Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0] "
 371   try:
 372     val = int(s, 16)
 373   except:
 374     val = 0
 375   if val != 0:
 376     val += 1
 377   return str(val / 256.0)
 378
 379
 380 def str2bool(s):
 381   "'true' goes to True, case-insensitively, and we strip whitespace."
 382   s = s.strip().lower()
 383   return s == "true"