lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     if document.cjk_encoding != '':
 250         return
 251     encoding_stack = [document.encoding]
 252     lang_re = re.compile(r"^\\lang\s(\S+)")
 253     if document.inputencoding == "auto" or document.inputencoding == "default":
 254         for i in range(len(document.body)):
 255             result = lang_re.match(document.body[i])
 256             if result:
 257                 language = result.group(1)
 258                 if language == "default":
 259                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 260                     encoding_stack[-1] = document.encoding
 261                 else:
 262                     from lyx2lyx_lang import lang
 263                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 264                     encoding_stack[-1] = lang[language][3]
 265             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 266                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 267                 encoding_stack.append(encoding_stack[-1])
 268             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 269                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(encoding_stack) == 1:
 271                     # Don't remove the document encoding from the stack
 272                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 273                 else:
 274                     del encoding_stack[-1]
 275             if encoding_stack[-1] != document.encoding:
 276                 if forward:
 277                     # This line has been incorrectly interpreted as if it was
 278                     # encoded in 'encoding'.
 279                     # Convert back to the 8bit string that was in the file.
 280                     orig = document.body[i].encode(document.encoding)
 281                     # Convert the 8bit string that was in the file to unicode
 282                     # with the correct encoding.
 283                     document.body[i] = orig.decode(encoding_stack[-1])
 284                 else:
 285                     # Convert unicode to the 8bit string that will be written
 286                     # to the file with the correct encoding.
 287                     orig = document.body[i].encode(encoding_stack[-1])
 288                     # Convert the 8bit string that will be written to the
 289                     # file to fake unicode with the encoding that will later
 290                     # be used when writing to the file.
 291                     document.body[i] = orig.decode(document.encoding)
 292
 293
 294 def convert_utf8(document):
 295     " Set document encoding to UTF-8. "
 296     convert_multiencoding(document, True)
 297     document.encoding = "utf8"
 298
 299
 300 def revert_utf8(document):
 301     " Set document encoding to the value corresponding to inputencoding. "
 302     i = find_token(document.header, "\\inputencoding", 0)
 303     if i == -1:
 304         document.header.append("\\inputencoding auto")
 305     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 306         document.header[i] = "\\inputencoding auto"
 307     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 308     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 309     convert_multiencoding(document, False)
 310
 311
 312 def revert_cs_label(document):
 313     " Remove status flag of charstyle label. "
 314     i = 0
 315     while 1:
 316         i = find_token(document.body, "\\begin_inset CharStyle", i)
 317         if i == -1:
 318             return
 319         # Seach for a line starting 'show_label'
 320         # If it is not there, break with a warning message
 321         i = i + 1
 322         while 1:
 323             if (document.body[i][:10] == "show_label"):
 324                 del document.body[i]
 325                 break
 326             elif (document.body[i][:13] == "\\begin_layout"):
 327                 document.warning("Malformed LyX document: Missing 'show_label'.")
 328                 break
 329             i = i + 1
 330
 331         i = i + 1
 332
 333
 334 def convert_bibitem(document):
 335     """ Convert
 336 \bibitem [option]{argument}
 337
 338 to
 339
 340 \begin_inset LatexCommand bibitem
 341 label "option"
 342 key "argument"
 343
 344 \end_inset
 345
 346 This must be called after convert_commandparams.
 347 """
 348     i = 0
 349     while 1:
 350         i = find_token(document.body, "\\bibitem", i)
 351         if i == -1:
 352             break
 353         j = document.body[i].find('[') + 1
 354         k = document.body[i].rfind(']')
 355         if j == 0: # No optional argument found
 356             option = None
 357         else:
 358             option = document.body[i][j:k]
 359         j = document.body[i].rfind('{') + 1
 360         k = document.body[i].rfind('}')
 361         argument = document.body[i][j:k]
 362         lines = ['\\begin_inset LatexCommand bibitem']
 363         if option != None:
 364             lines.append('label "%s"' % option.replace('"', '\\"'))
 365         lines.append('key "%s"' % argument.replace('"', '\\"'))
 366         lines.append('')
 367         lines.append('\\end_inset')
 368         document.body[i:i+1] = lines
 369         i = i + 1
 370
 371
 372 commandparams_info = {
 373     # command : [option1, option2, argument]
 374     "bibitem" : ["label", "", "key"],
 375     "bibtex" : ["options", "btprint", "bibfiles"],
 376     "cite"        : ["after", "before", "key"],
 377     "citet"       : ["after", "before", "key"],
 378     "citep"       : ["after", "before", "key"],
 379     "citealt"     : ["after", "before", "key"],
 380     "citealp"     : ["after", "before", "key"],
 381     "citeauthor"  : ["after", "before", "key"],
 382     "citeyear"    : ["after", "before", "key"],
 383     "citeyearpar" : ["after", "before", "key"],
 384     "citet*"      : ["after", "before", "key"],
 385     "citep*"      : ["after", "before", "key"],
 386     "citealt*"    : ["after", "before", "key"],
 387     "citealp*"    : ["after", "before", "key"],
 388     "citeauthor*" : ["after", "before", "key"],
 389     "Citet"       : ["after", "before", "key"],
 390     "Citep"       : ["after", "before", "key"],
 391     "Citealt"     : ["after", "before", "key"],
 392     "Citealp"     : ["after", "before", "key"],
 393     "Citeauthor"  : ["after", "before", "key"],
 394     "Citet*"      : ["after", "before", "key"],
 395     "Citep*"      : ["after", "before", "key"],
 396     "Citealt*"    : ["after", "before", "key"],
 397     "Citealp*"    : ["after", "before", "key"],
 398     "Citeauthor*" : ["after", "before", "key"],
 399     "citefield"   : ["after", "before", "key"],
 400     "citetitle"   : ["after", "before", "key"],
 401     "cite*"       : ["after", "before", "key"],
 402     "hfill" : ["", "", ""],
 403     "index"      : ["", "", "name"],
 404     "printindex" : ["", "", "name"],
 405     "label" : ["", "", "name"],
 406     "eqref"     : ["name", "", "reference"],
 407     "pageref"   : ["name", "", "reference"],
 408     "prettyref" : ["name", "", "reference"],
 409     "ref"       : ["name", "", "reference"],
 410     "vpageref"  : ["name", "", "reference"],
 411     "vref"      : ["name", "", "reference"],
 412     "tableofcontents" : ["", "", "type"],
 413     "htmlurl" : ["name", "", "target"],
 414     "url"     : ["name", "", "target"]}
 415
 416
 417 def convert_commandparams(document):
 418     """ Convert
 419
 420  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 421  \end_inset
 422
 423  to
 424
 425  \begin_inset LatexCommand cmdname
 426  name1 "opt1"
 427  name2 "opt2"
 428  name3 "arg"
 429  \end_inset
 430
 431  name1, name2 and name3 can be different for each command.
 432 """
 433     # \begin_inset LatexCommand bibitem was not the official version (see
 434     # convert_bibitem()), but could be read in, so we convert it here, too.
 435
 436     i = 0
 437     while 1:
 438         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 439         if i == -1:
 440             break
 441         command = document.body[i][26:].strip()
 442         if command == "":
 443             document.warning("Malformed LyX document: Missing LatexCommand name.")
 444             i = i + 1
 445             continue
 446
 447         j = find_token(document.body, "\\end_inset", i + 1)
 448         if j == -1:
 449             document.warning("Malformed document")
 450         else:
 451             command += "".join(document.body[i+1:j])
 452             document.body[i+1:j] = []
 453
 454         # The following parser is taken from the original InsetCommandParams::scanCommand
 455         name = ""
 456         option1 = ""
 457         option2 = ""
 458         argument = ""
 459         state = "WS"
 460         # Used to handle things like \command[foo[bar]]{foo{bar}}
 461         nestdepth = 0
 462         b = 0
 463         for c in command:
 464             if ((state == "CMDNAME" and c == ' ') or
 465                 (state == "CMDNAME" and c == '[') or
 466                 (state == "CMDNAME" and c == '{')):
 467                 state = "WS"
 468             if ((state == "OPTION" and c == ']') or
 469                 (state == "SECOPTION" and c == ']') or
 470                 (state == "CONTENT" and c == '}')):
 471                 if nestdepth == 0:
 472                     state = "WS"
 473                 else:
 474                     nestdepth = nestdepth - 1
 475             if ((state == "OPTION" and c == '[') or
 476                 (state == "SECOPTION" and c == '[') or
 477                 (state == "CONTENT" and c == '{')):
 478                 nestdepth = nestdepth + 1
 479             if state == "CMDNAME":
 480                     name += c
 481             elif state == "OPTION":
 482                     option1 += c
 483             elif state == "SECOPTION":
 484                     option2 += c
 485             elif state == "CONTENT":
 486                     argument += c
 487             elif state == "WS":
 488                 if c == '\\':
 489                     state = "CMDNAME"
 490                 elif c == '[' and b != ']':
 491                     state = "OPTION"
 492                     nestdepth = 0 # Just to be sure
 493                 elif c == '[' and b == ']':
 494                     state = "SECOPTION"
 495                     nestdepth = 0 # Just to be sure
 496                 elif c == '{':
 497                     state = "CONTENT"
 498                     nestdepth = 0 # Just to be sure
 499             b = c
 500
 501         # Now we have parsed the command, output the parameters
 502         lines = ["\\begin_inset LatexCommand %s" % name]
 503         if option1 != "":
 504             if commandparams_info[name][0] == "":
 505                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 506             else:
 507                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 508         if option2 != "":
 509             if commandparams_info[name][1] == "":
 510                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 511             else:
 512                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 513         if argument != "":
 514             if commandparams_info[name][2] == "":
 515                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 516             else:
 517                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 518         document.body[i:i+1] = lines
 519         i = i + 1
 520
 521
 522 def revert_commandparams(document):
 523     regex = re.compile(r'(\S+)\s+(.+)')
 524     i = 0
 525     while 1:
 526         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 527         if i == -1:
 528             break
 529         name = document.body[i].split()[2]
 530         j = find_end_of_inset(document.body, i + 1)
 531         preview_line = ""
 532         option1 = ""
 533         option2 = ""
 534         argument = ""
 535         for k in range(i + 1, j):
 536             match = re.match(regex, document.body[k])
 537             if match:
 538                 pname = match.group(1)
 539                 pvalue = match.group(2)
 540                 if pname == "preview":
 541                     preview_line = document.body[k]
 542                 elif (commandparams_info[name][0] != "" and
 543                       pname == commandparams_info[name][0]):
 544                     option1 = pvalue.strip('"').replace('\\"', '"')
 545                 elif (commandparams_info[name][1] != "" and
 546                       pname == commandparams_info[name][1]):
 547                     option2 = pvalue.strip('"').replace('\\"', '"')
 548                 elif (commandparams_info[name][2] != "" and
 549                       pname == commandparams_info[name][2]):
 550                     argument = pvalue.strip('"').replace('\\"', '"')
 551             elif document.body[k].strip() != "":
 552                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 553         if name == "bibitem":
 554             if option1 == "":
 555                 lines = ["\\bibitem {%s}" % argument]
 556             else:
 557                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 558         else:
 559             if option1 == "":
 560                 if option2 == "":
 561                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 562                 else:
 563                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 564             else:
 565                 if option2 == "":
 566                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 567                 else:
 568                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 569         if name != "bibitem":
 570             if preview_line != "":
 571                 lines.append(preview_line)
 572             lines.append('')
 573             lines.append('\\end_inset')
 574         document.body[i:j+1] = lines
 575         i = j + 1
 576
 577
 578 def revert_nomenclature(document):
 579     " Convert nomenclature entry to ERT. "
 580     regex = re.compile(r'(\S+)\s+(.+)')
 581     i = 0
 582     use_nomencl = 0
 583     while 1:
 584         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 585         if i == -1:
 586             break
 587         use_nomencl = 1
 588         j = find_end_of_inset(document.body, i + 1)
 589         preview_line = ""
 590         symbol = ""
 591         description = ""
 592         prefix = ""
 593         for k in range(i + 1, j):
 594             match = re.match(regex, document.body[k])
 595             if match:
 596                 name = match.group(1)
 597                 value = match.group(2)
 598                 if name == "preview":
 599                     preview_line = document.body[k]
 600                 elif name == "symbol":
 601                     symbol = value.strip('"').replace('\\"', '"')
 602                 elif name == "description":
 603                     description = value.strip('"').replace('\\"', '"')
 604                 elif name == "prefix":
 605                     prefix = value.strip('"').replace('\\"', '"')
 606             elif document.body[k].strip() != "":
 607                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 608         if prefix == "":
 609             command = 'nomenclature{%s}{%s}' % (symbol, description)
 610         else:
 611             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 612         document.body[i:j+1] = ['\\begin_inset ERT',
 613                                 'status collapsed',
 614                                 '',
 615                                 '\\begin_layout %s' % document.default_layout,
 616                                 '',
 617                                 '',
 618                                 '\\backslash',
 619                                 command,
 620                                 '\\end_layout',
 621                                 '',
 622                                 '\\end_inset']
 623         i = i + 11
 624     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 625         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 626         document.preamble.append('\\makenomenclature')
 627
 628
 629 def revert_printnomenclature(document):
 630     " Convert printnomenclature to ERT. "
 631     regex = re.compile(r'(\S+)\s+(.+)')
 632     i = 0
 633     use_nomencl = 0
 634     while 1:
 635         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 636         if i == -1:
 637             break
 638         use_nomencl = 1
 639         j = find_end_of_inset(document.body, i + 1)
 640         preview_line = ""
 641         labelwidth = ""
 642         for k in range(i + 1, j):
 643             match = re.match(regex, document.body[k])
 644             if match:
 645                 name = match.group(1)
 646                 value = match.group(2)
 647                 if name == "preview":
 648                     preview_line = document.body[k]
 649                 elif name == "labelwidth":
 650                     labelwidth = value.strip('"').replace('\\"', '"')
 651             elif document.body[k].strip() != "":
 652                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 653         if labelwidth == "":
 654             command = 'nomenclature{}'
 655         else:
 656             command = 'nomenclature[%s]' % labelwidth
 657         document.body[i:j+1] = ['\\begin_inset ERT',
 658                                 'status collapsed',
 659                                 '',
 660                                 '\\begin_layout %s' % document.default_layout,
 661                                 '',
 662                                 '',
 663                                 '\\backslash',
 664                                 command,
 665                                 '\\end_layout',
 666                                 '',
 667                                 '\\end_inset']
 668         i = i + 11
 669     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 670         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 671         document.preamble.append('\\makenomenclature')
 672
 673
 674 def convert_esint(document):
 675     " Add \\use_esint setting to header. "
 676     i = find_token(document.header, "\\cite_engine", 0)
 677     if i == -1:
 678         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 679         return
 680     # 0 is off, 1 is auto, 2 is on.
 681     document.header.insert(i, '\\use_esint 0')
 682
 683
 684 def revert_esint(document):
 685     " Remove \\use_esint setting from header. "
 686     i = find_token(document.header, "\\use_esint", 0)
 687     if i == -1:
 688         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 689         return
 690     use_esint = document.header[i].split()[1]
 691     del document.header[i]
 692     # 0 is off, 1 is auto, 2 is on.
 693     if (use_esint == 2):
 694         document.preamble.append('\\usepackage{esint}')
 695
 696
 697 def revert_clearpage(document):
 698     " clearpage -> ERT "
 699     i = 0
 700     while 1:
 701         i = find_token(document.body, "\\clearpage", i)
 702         if i == -1:
 703             break
 704         document.body[i:i+1] =  ['\\begin_inset ERT',
 705                                 'status collapsed',
 706                                 '',
 707                                 '\\begin_layout %s' % document.default_layout,
 708                                 '',
 709                                 '',
 710                                 '\\backslash',
 711                                 'clearpage',
 712                                 '\\end_layout',
 713                                 '',
 714                                 '\\end_inset']
 715     i = i + 1
 716
 717
 718 def revert_cleardoublepage(document):
 719     " cleardoublepage -> ERT "
 720     i = 0
 721     while 1:
 722         i = find_token(document.body, "\\cleardoublepage", i)
 723         if i == -1:
 724             break
 725         document.body[i:i+1] =  ['\\begin_inset ERT',
 726                                 'status collapsed',
 727                                 '',
 728                                 '\\begin_layout %s' % document.default_layout,
 729                                 '',
 730                                 '',
 731                                 '\\backslash',
 732                                 'cleardoublepage',
 733                                 '\\end_layout',
 734                                 '',
 735                                 '\\end_inset']
 736     i = i + 1
 737
 738
 739 def convert_lyxline(document):
 740     " remove fontsize commands for \lyxline "
 741     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 742     # to change the line thickness. The new definiton does this so that imported
 743     # \lyxlines would have a different line thickness. The eventual fontsize command
 744     # before \lyxline is therefore removed to get the same output.
 745     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 746                  "large", "Large", "LARGE", "huge", "Huge"]
 747     for n in range(0, len(fontsizes)):
 748         i = 0
 749         k = 0
 750         while i < len(document.body):
 751             i = find_token(document.body, "\\size " + fontsizes[n], i)
 752             k = find_token(document.body, "\\lyxline", i)
 753             # the corresponding fontsize command is always 2 lines before the \lyxline
 754             if (i != -1 and k == i+2):
 755                 document.body[i:i+1] = []
 756             else:
 757                 break
 758         i = i + 1
 759
 760
 761 def revert_encodings(document):
 762     " Set new encodings to auto. "
 763     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 764                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 765                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 766     i = find_token(document.header, "\\inputencoding", 0)
 767     if i == -1:
 768         document.header.append("\\inputencoding auto")
 769     else:
 770         inputenc = get_value(document.header, "\\inputencoding", i)
 771         if inputenc in encodings:
 772             document.header[i] = "\\inputencoding auto"
 773     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 774
 775
 776 def convert_caption(document):
 777     " Convert caption layouts to caption insets. "
 778     i = 0
 779     while 1:
 780         i = find_token(document.body, "\\begin_layout Caption", i)
 781         if i == -1:
 782             return
 783         j = find_end_of_layout(document.body, i)
 784         if j == -1:
 785             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 786             return
 787
 788         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 789         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 790                             "\\begin_inset Caption", "",
 791                             "\\begin_layout %s" % document.default_layout]
 792         i = i + 1
 793
 794
 795 def revert_caption(document):
 796     " Convert caption insets to caption layouts. "
 797     " This assumes that the text class has a caption style. "
 798     i = 0
 799     while 1:
 800         i = find_token(document.body, "\\begin_inset Caption", i)
 801         if i == -1:
 802             return
 803
 804         # We either need to delete the previous \begin_layout line, or we
 805         # need to end the previous layout if this inset is not in the first
 806         # position of the paragraph.
 807         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 808         if layout_before == -1:
 809             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 810             return
 811         layout_line = document.body[layout_before]
 812         del_layout_before = True
 813         l = layout_before + 1
 814         while l < i:
 815             if document.body[l] != "":
 816                 del_layout_before = False
 817                 break
 818             l = l + 1
 819         if del_layout_before:
 820             del document.body[layout_before:i]
 821             i = layout_before
 822         else:
 823             document.body[i:i] = ["\\end_layout", ""]
 824             i = i + 2
 825
 826         # Find start of layout in the inset and end of inset
 827         j = find_token(document.body, "\\begin_layout", i)
 828         if j == -1:
 829             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 830             return
 831         k = find_end_of_inset(document.body, i)
 832         if k == -1:
 833             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 834             return
 835
 836         # We either need to delete the following \end_layout line, or we need
 837         # to restart the old layout if this inset is not at the paragraph end.
 838         layout_after = find_token(document.body, "\\end_layout", k)
 839         if layout_after == -1:
 840             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 841             return
 842         del_layout_after = True
 843         l = k + 1
 844         while l < layout_after:
 845             if document.body[l] != "":
 846                 del_layout_after = False
 847                 break
 848             l = l + 1
 849         if del_layout_after:
 850             del document.body[k+1:layout_after+1]
 851         else:
 852             document.body[k+1:k+1] = [layout_line, ""]
 853
 854         # delete \begin_layout and \end_inset and replace \begin_inset with
 855         # "\begin_layout Caption". This works because we can only have one
 856         # paragraph in the caption inset: The old \end_layout will be recycled.
 857         del document.body[k]
 858         if document.body[k] == "":
 859             del document.body[k]
 860         del document.body[j]
 861         if document.body[j] == "":
 862             del document.body[j]
 863         document.body[i] = "\\begin_layout Caption"
 864         if document.body[i+1] == "":
 865             del document.body[i+1]
 866         i = i + 1
 867
 868
 869 # Accents of InsetLaTeXAccent
 870 accent_map = {
 871     "`" : u'\u0300', # grave
 872     "'" : u'\u0301', # acute
 873     "^" : u'\u0302', # circumflex
 874     "~" : u'\u0303', # tilde
 875     "=" : u'\u0304', # macron
 876     "u" : u'\u0306', # breve
 877     "." : u'\u0307', # dot above
 878     "\"": u'\u0308', # diaeresis
 879     "r" : u'\u030a', # ring above
 880     "H" : u'\u030b', # double acute
 881     "v" : u'\u030c', # caron
 882     "b" : u'\u0320', # minus sign below
 883     "d" : u'\u0323', # dot below
 884     "c" : u'\u0327', # cedilla
 885     "k" : u'\u0328', # ogonek
 886     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 887                      # only one is given as argument, so we don't need to
 888                      # treat it differently.
 889 }
 890
 891
 892 # special accents of InsetLaTeXAccent without argument
 893 special_accent_map = {
 894     'i' : u'\u0131', # dotless i
 895     'j' : u'\u0237', # dotless j
 896     'l' : u'\u0142', # l with stroke
 897     'L' : u'\u0141'  # L with stroke
 898 }
 899
 900
 901 # special accent arguments of InsetLaTeXAccent
 902 accented_map = {
 903     '\\i' : u'\u0131', # dotless i
 904     '\\j' : u'\u0237'  # dotless j
 905 }
 906
 907
 908 def _convert_accent(accent, accented_char):
 909     type = accent
 910     char = accented_char
 911     if char == '':
 912         if type in special_accent_map:
 913             return special_accent_map[type]
 914         # a missing char is treated as space by LyX
 915         char = ' '
 916     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 917         # Special caron, only used with t, d, l and L.
 918         # It is not in the map because we convert it to the same unicode
 919         # character as the normal caron: \q{} is only defined if babel with
 920         # the czech or slovak language is used, and the normal caron
 921         # produces the correct output if the T1 font encoding is used.
 922         # For the same reason we never convert to \q{} in the other direction.
 923         type = 'v'
 924     elif char in accented_map:
 925         char = accented_map[char]
 926     elif (len(char) > 1):
 927         # We can only convert accents on a single char
 928         return ''
 929     a = accent_map.get(type)
 930     if a:
 931         return unicodedata.normalize("NFC", "%s%s" % (char, a))
 932     return ''
 933
 934
 935 def convert_ertbackslash(body, i, ert, default_layout):
 936     r""" -------------------------------------------------------------------------------------------
 937     Convert backslashes and '\n' into valid ERT code, append the converted
 938     text to body[i] and return the (maybe incremented) line index i"""
 939
 940     for c in ert:
 941         if c == '\\':
 942             body[i] = body[i] + '\\backslash '
 943             i = i + 1
 944             body.insert(i, '')
 945         elif c == '\n':
 946             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 947             i = i + 4
 948         else:
 949             body[i] = body[i] + c
 950     return i
 951
 952
 953 def convert_accent(document):
 954     # The following forms are supported by LyX:
 955     # '\i \"{a}' (standard form, as written by LyX)
 956     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 957     # '\i \"{ }' (also accepted if the accented char is a space)
 958     # '\i \" a'  (also accepted)
 959     # '\i \"'    (also accepted)
 960     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 961     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 962     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 963     i = 0
 964     while 1:
 965         i = find_re(document.body, re_wholeinset, i)
 966         if i == -1:
 967             return
 968         match = re_wholeinset.match(document.body[i])
 969         prefix = match.group(1)
 970         contents = match.group(3).strip()
 971         match = re_contents.match(contents)
 972         if match:
 973             # Strip first char (always \)
 974             accent = match.group(1)[1:]
 975             accented_contents = match.group(2).strip()
 976             match = re_accentedcontents.match(accented_contents)
 977             accented_char = match.group(1)
 978             converted = _convert_accent(accent, accented_char)
 979             if converted == '':
 980                 # Normalize contents
 981                 contents = '%s{%s}' % (accent, accented_char),
 982             else:
 983                 document.body[i] = '%s%s' % (prefix, converted)
 984                 i += 1
 985                 continue
 986         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 987         document.body[i] = prefix
 988         document.body[i+1:i+1] = ['\\begin_inset ERT',
 989                                   'status collapsed',
 990                                   '',
 991                                   '\\begin_layout %s' % document.default_layout,
 992                                   '',
 993                                   '',
 994                                   '']
 995         i = convert_ertbackslash(document.body, i + 7,
 996                                  '\\%s' % contents,
 997                                  document.default_layout)
 998         document.body[i+1:i+1] = ['\\end_layout',
 999                                   '',
1000                                   '\\end_inset']
1001         i += 3
1002
1003
1004 def revert_accent(document):
1005     inverse_accent_map = {}
1006     for k in accent_map:
1007         inverse_accent_map[accent_map[k]] = k
1008     inverse_special_accent_map = {}
1009     for k in special_accent_map:
1010         inverse_special_accent_map[special_accent_map[k]] = k
1011     inverse_accented_map = {}
1012     for k in accented_map:
1013         inverse_accented_map[accented_map[k]] = k
1014
1015     # Since LyX may insert a line break within a word we must combine all
1016     # words before unicode normalization.
1017     # We do this only if the next line starts with an accent, otherwise we
1018     # would create things like '\begin_inset ERTstatus'.
1019     numberoflines = len(document.body)
1020     for i in range(numberoflines-1):
1021         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1022             continue
1023         if (document.body[i+1][0] in inverse_accent_map):
1024             # the last character of this line and the first of the next line
1025             # form probably a surrogate pair.
1026             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1027                 document.body[i] += document.body[i+1][0]
1028                 document.body[i+1] = document.body[i+1][1:]
1029
1030     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1031     # This is needed to catch all accented characters.
1032     for i in range(numberoflines):
1033         # Unfortunately we have a mixture of unicode strings and plain strings,
1034         # because we never use u'xxx' for string literals, but 'xxx'.
1035         # Therefore we may have to try two times to normalize the data.
1036         try:
1037             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1038         except TypeError:
1039             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1040
1041     # Replace accented characters with InsetLaTeXAccent
1042     # Do not convert characters that can be represented in the chosen
1043     # encoding.
1044     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1045     lang_re = re.compile(r"^\\lang\s(\S+)")
1046     for i in range(len(document.body)):
1047
1048         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1049             # Track the encoding of the current line
1050             result = lang_re.match(document.body[i])
1051             if result:
1052                 language = result.group(1)
1053                 if language == "default":
1054                     encoding_stack[-1] = document.encoding
1055                 else:
1056                     from lyx2lyx_lang import lang
1057                     encoding_stack[-1] = lang[language][3]
1058                 continue
1059             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1060                 encoding_stack.append(encoding_stack[-1])
1061                 continue
1062             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1063                 del encoding_stack[-1]
1064                 continue
1065
1066         for j in range(len(document.body[i])):
1067             # dotless i and dotless j are both in special_accent_map and can
1068             # occur as an accented character, so we need to test that the
1069             # following character is no accent
1070             if (document.body[i][j] in inverse_special_accent_map and
1071                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1072                 accent = document.body[i][j]
1073                 try:
1074                     dummy = accent.encode(encoding_stack[-1])
1075                 except UnicodeEncodeError:
1076                     # Insert the rest of the line as new line
1077                     if j < len(document.body[i]) - 1:
1078                         document.body[i+1:i+1] = document.body[i][j+1:]
1079                     # Delete the accented character
1080                     if j > 0:
1081                         document.body[i] = document.body[i][:j-1]
1082                     else:
1083                         document.body[i] = u''
1084                     # Finally add the InsetLaTeXAccent
1085                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1086                     break
1087             elif j > 0 and document.body[i][j] in inverse_accent_map:
1088                 accented_char = document.body[i][j-1]
1089                 if accented_char == ' ':
1090                     # Conform to LyX output
1091                     accented_char = ''
1092                 elif accented_char in inverse_accented_map:
1093                     accented_char = inverse_accented_map[accented_char]
1094                 accent = document.body[i][j]
1095                 try:
1096                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1097                 except UnicodeEncodeError:
1098                     # Insert the rest of the line as new line
1099                     if j < len(document.body[i]) - 1:
1100                         document.body[i+1:i+1] = document.body[i][j+1:]
1101                     # Delete the accented characters
1102                     if j > 1:
1103                         document.body[i] = document.body[i][:j-2]
1104                     else:
1105                         document.body[i] = u''
1106                     # Finally add the InsetLaTeXAccent
1107                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1108                     break
1109     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1110     for i in range(numberoflines):
1111         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1112
1113
1114 def normalize_font_whitespace_259(document):
1115     """ Before format 259 the font changes were ignored if a
1116     whitespace was the first or last character in the sequence, this function
1117     transfers the whitespace outside."""
1118
1119     char_properties = {"\\series": "default",
1120                        "\\emph": "default",
1121                        "\\color": "none",
1122                        "\\shape": "default",
1123                        "\\bar": "default",
1124                        "\\family": "default"}
1125     return normalize_font_whitespace(document, char_properties)
1126
1127 def normalize_font_whitespace_274(document):
1128     """ Before format 259 (sic) the font changes were ignored if a
1129     whitespace was the first or last character in the sequence. This was
1130     corrected for most font properties in format 259, but the language
1131     was forgotten then. This function applies the same conversion done
1132     there (namely, transfers the whitespace outside) for font language
1133     changes, as well."""
1134
1135     char_properties = {"\\lang": "default"}
1136     return normalize_font_whitespace(document, char_properties)
1137
1138 def get_paragraph_language(document, i):
1139     """ Return the language of the paragraph in which line i of the document
1140     body is. If the first thing in the paragraph is a \\lang command, that
1141     is the paragraph's langauge; otherwise, the paragraph's language is the
1142     document's language."""
1143
1144     lines = document.body
1145
1146     first_nonempty_line = \
1147         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1148
1149     words = lines[first_nonempty_line].split()
1150
1151     if len(words) > 1 and words[0] == "\\lang":
1152         return words[1]
1153     else:
1154         return document.language
1155
1156 def normalize_font_whitespace(document, char_properties):
1157     """ Before format 259 the font changes were ignored if a
1158     whitespace was the first or last character in the sequence, this function
1159     transfers the whitespace outside. Only a change in one of the properties
1160     in the provided     char_properties is handled by this function."""
1161
1162     if document.backend != "latex":
1163         return
1164
1165     lines = document.body
1166
1167     changes = {}
1168
1169     i = 0
1170     while i < len(lines):
1171         words = lines[i].split()
1172
1173         if len(words) > 0 and words[0] == "\\begin_layout":
1174             # a new paragraph resets all font changes
1175             changes.clear()
1176             # also reset the default language to be the paragraph's language
1177             if "\\lang" in char_properties.keys():
1178                 char_properties["\\lang"] = \
1179                     get_paragraph_language(document, i + 1)
1180
1181         elif len(words) > 1 and words[0] in char_properties.keys():
1182             # we have a font change
1183             if char_properties[words[0]] == words[1]:
1184                 # property gets reset
1185                 if words[0] in changes.keys():
1186                     del changes[words[0]]
1187                 defaultproperty = True
1188             else:
1189                 # property gets set
1190                 changes[words[0]] = words[1]
1191                 defaultproperty = False
1192
1193             # We need to explicitly reset all changed properties if we find
1194             # a space below, because LyX 1.4 would output the space after
1195             # closing the previous change and before starting the new one,
1196             # and closing a font change means to close all properties, not
1197             # just the changed one.
1198
1199             if lines[i-1] and lines[i-1][-1] == " ":
1200                 lines[i-1] = lines[i-1][:-1]
1201                 # a space before the font change
1202                 added_lines = [" "]
1203                 for k in changes.keys():
1204                     # exclude property k because that is already in lines[i]
1205                     if k != words[0]:
1206                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1207                 for k in changes.keys():
1208                     # exclude property k because that must be added below anyway
1209                     if k != words[0]:
1210                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1211                 if defaultproperty:
1212                     # Property is reset in lines[i], so add the new stuff afterwards
1213                     lines[i+1:i+1] = added_lines
1214                 else:
1215                     # Reset property for the space
1216                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1217                     lines[i:i] = added_lines
1218                 i = i + len(added_lines)
1219
1220             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1221                 # a space after the font change
1222                 if (lines[i+1] == " " and lines[i+2]):
1223                     next_words = lines[i+2].split()
1224                     if len(next_words) > 0 and next_words[0] == words[0]:
1225                         # a single blank with a property different from the
1226                         # previous and the next line must not be changed
1227                         i = i + 2
1228                         continue
1229                 lines[i+1] = lines[i+1][1:]
1230                 added_lines = [" "]
1231                 for k in changes.keys():
1232                     # exclude property k because that is already in lines[i]
1233                     if k != words[0]:
1234                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1235                 for k in changes.keys():
1236                     # exclude property k because that must be added below anyway
1237                     if k != words[0]:
1238                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1239                 # Reset property for the space
1240                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1241                 lines[i:i] = added_lines
1242                 i = i + len(added_lines)
1243
1244         i = i + 1
1245
1246
1247 def revert_utf8x(document):
1248     " Set utf8x encoding to utf8. "
1249     i = find_token(document.header, "\\inputencoding", 0)
1250     if i == -1:
1251         document.header.append("\\inputencoding auto")
1252     else:
1253         inputenc = get_value(document.header, "\\inputencoding", i)
1254         if inputenc == "utf8x":
1255             document.header[i] = "\\inputencoding utf8"
1256     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1257
1258
1259 def revert_utf8plain(document):
1260     " Set utf8plain encoding to utf8. "
1261     i = find_token(document.header, "\\inputencoding", 0)
1262     if i == -1:
1263         document.header.append("\\inputencoding auto")
1264     else:
1265         inputenc = get_value(document.header, "\\inputencoding", i)
1266         if inputenc == "utf8-plain":
1267             document.header[i] = "\\inputencoding utf8"
1268     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1269
1270
1271 def revert_beamer_alert(document):
1272     " Revert beamer's \\alert inset back to ERT. "
1273     i = 0
1274     while 1:
1275         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1276         if i == -1:
1277             return
1278         document.body[i] = "\\begin_inset ERT"
1279         i = i + 1
1280         while 1:
1281             if (document.body[i][:13] == "\\begin_layout"):
1282                 # Insert the \alert command
1283                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1284                 break
1285             i = i + 1
1286
1287         i = i + 1
1288
1289
1290 def revert_beamer_structure(document):
1291     " Revert beamer's \\structure inset back to ERT. "
1292     i = 0
1293     while 1:
1294         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1295         if i == -1:
1296             return
1297         document.body[i] = "\\begin_inset ERT"
1298         i = i + 1
1299         while 1:
1300             if (document.body[i][:13] == "\\begin_layout"):
1301                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1302                 break
1303             i = i + 1
1304
1305         i = i + 1
1306
1307
1308 def convert_changes(document):
1309     " Switch output_changes off if tracking_changes is off. "
1310     i = find_token(document.header, '\\tracking_changes', 0)
1311     if i == -1:
1312         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1313         return
1314     j = find_token(document.header, '\\output_changes', 0)
1315     if j == -1:
1316         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1317         return
1318     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1319     output_changes = get_value(document.header, "\\output_changes", j)
1320     if tracking_changes == "false" and output_changes == "true":
1321         document.header[j] = "\\output_changes false"
1322
1323
1324 def revert_ascii(document):
1325     " Set ascii encoding to auto. "
1326     i = find_token(document.header, "\\inputencoding", 0)
1327     if i == -1:
1328         document.header.append("\\inputencoding auto")
1329     else:
1330         inputenc = get_value(document.header, "\\inputencoding", i)
1331         if inputenc == "ascii":
1332             document.header[i] = "\\inputencoding auto"
1333     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1334
1335
1336 def normalize_language_name(document):
1337     lang = { "brazil": "brazilian",
1338              "portuges": "portuguese"}
1339
1340     if document.language in lang:
1341         document.language = lang[document.language]
1342         i = find_token(document.header, "\\language", 0)
1343         document.header[i] = "\\language %s" % document.language
1344
1345
1346 def revert_language_name(document):
1347     lang = { "brazilian": "brazil",
1348              "portuguese": "portuges"}
1349
1350     if document.language in lang:
1351         document.language = lang[document.language]
1352         i = find_token(document.header, "\\language", 0)
1353         document.header[i] = "\\language %s" % document.language
1354
1355 #
1356 #  \textclass cv -> \textclass simplecv
1357 def convert_cv_textclass(document):
1358     if document.textclass == "cv":
1359         document.textclass = "simplecv"
1360
1361
1362 def revert_cv_textclass(document):
1363     if document.textclass == "simplecv":
1364         document.textclass = "cv"
1365
1366
1367 #
1368 # add scaleBeforeRotation graphics param
1369 def convert_graphics_rotation(document):
1370     " add scaleBeforeRotation graphics parameter. "
1371     i = 0
1372     while 1:
1373         i = find_token(document.body, "\\begin_inset Graphics", i)
1374         if i == -1:
1375             return
1376         j = find_end_of_inset(document.body, i+1)
1377         if j == -1:
1378             # should not happen
1379             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1380         # Seach for rotateAngle and width or height or scale
1381         # If these params are not there, nothing needs to be done.
1382         k = find_token(document.body, "\trotateAngle", i + 1, j)
1383         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1384         if (k != -1 and l != -1):
1385             document.body.insert(j, 'scaleBeforeRotation')
1386         i = i + 1
1387
1388
1389 #
1390 # remove scaleBeforeRotation graphics param
1391 def revert_graphics_rotation(document):
1392     " remove scaleBeforeRotation graphics parameter. "
1393     i = 0
1394     while 1:
1395         i = find_token(document.body, "\\begin_inset Graphics", i)
1396         if i == -1:
1397             return
1398         j = find_end_of_inset(document.body, i + 1)
1399         if j == -1:
1400             # should not happen
1401             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1402         # If there's a scaleBeforeRotation param, just remove that
1403         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1404         if k != -1:
1405             del document.body[k]
1406         else:
1407             # if not, and if we have rotateAngle and width or height or scale,
1408             # we have to put the rotateAngle value to special
1409             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1410             special = get_value(document.body, 'special', i + 1, j)
1411             if rotateAngle != "":
1412                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1413                 if k == -1:
1414                     break
1415                 if special == "":
1416                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1417                 else:
1418                     l = find_token(document.body, "\tspecial", i + 1, j)
1419                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1420                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1421                 if k != -1:
1422                     del document.body[k]
1423         i = i + 1
1424
1425
1426
1427 def convert_tableborder(document):
1428     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1429     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1430     i = 0
1431     while i < len(document.body):
1432         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1433         k = document.body[i].find("|>{", 0, len(document.body[i]))
1434         # the two tokens have to be in one line
1435         if (h != -1 and k != -1):
1436             # delete the "|"
1437             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1438         i = i + 1
1439
1440
1441 def revert_tableborder(document):
1442     i = 0
1443     while i < len(document.body):
1444         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1445         k = document.body[i].find(">{", 0, len(document.body[i]))
1446         # the two tokens have to be in one line
1447         if (h != -1 and k != -1):
1448             # add the "|"
1449             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1450         i = i + 1
1451
1452
1453 def revert_armenian(document):
1454
1455     # set inputencoding from armscii8 to auto
1456     if document.inputencoding == "armscii8":
1457         i = find_token(document.header, "\\inputencoding", 0)
1458         if i != -1:
1459             document.header[i] = "\\inputencoding auto"
1460     # check if preamble exists, if not k is set to -1
1461     i = 0
1462     k = -1
1463     while i < len(document.preamble):
1464         if k == -1:
1465             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1466         if k == -1:
1467             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1468         i = i + 1
1469     # add the entry \usepackage{armtex} to the document preamble
1470     if document.language == "armenian":
1471         # set the armtex entry as the first preamble line
1472         if k != -1:
1473             document.preamble[0:0] = ["\\usepackage{armtex}"]
1474         # create the preamble when it doesn't exist
1475         else:
1476             document.preamble.append('\\usepackage{armtex}')
1477     # Set document language from armenian to english
1478     if document.language == "armenian":
1479         document.language = "english"
1480         i = find_token(document.header, "\\language", 0)
1481         if i != -1:
1482             document.header[i] = "\\language english"
1483
1484
1485 def revert_CJK(document):
1486     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1487     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1488                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1489     i = find_token(document.header, "\\inputencoding", 0)
1490     if i == -1:
1491         document.header.append("\\inputencoding auto")
1492     else:
1493         inputenc = get_value(document.header, "\\inputencoding", i)
1494         if inputenc in encodings:
1495             document.header[i] = "\\inputencoding default"
1496     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1497
1498     if document.language == "chinese-simplified" or \
1499        document.language == "chinese-traditional" or \
1500        document.language == "japanese" or document.language == "korean":
1501         document.language = "english"
1502         i = find_token(document.header, "\\language", 0)
1503         if i != -1:
1504             document.header[i] = "\\language english"
1505
1506
1507 def revert_preamble_listings_params(document):
1508     " Revert preamble option \listings_params "
1509     i = find_token(document.header, "\\listings_params", 0)
1510     if i != -1:
1511         document.preamble.append('\\usepackage{listings}')
1512         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1513         document.header.pop(i);
1514
1515
1516 def revert_listings_inset(document):
1517     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1518 FROM
1519
1520 \begin_inset
1521 lstparams "language=Delphi"
1522 inline true
1523 status open
1524
1525 \begin_layout Standard
1526 var i = 10;
1527 \end_layout
1528
1529 \end_inset
1530
1531 TO
1532
1533 \begin_inset ERT
1534 status open
1535 \begin_layout Standard
1536
1537
1538 \backslash
1539 lstinline[language=Delphi]{var i = 10;}
1540 \end_layout
1541
1542 \end_inset
1543
1544 There can be an caption inset in this inset
1545
1546 \begin_layout Standard
1547 \begin_inset Caption
1548
1549 \begin_layout Standard
1550 before label
1551 \begin_inset LatexCommand label
1552 name "lst:caption"
1553
1554 \end_inset
1555
1556 after label
1557 \end_layout
1558
1559 \end_inset
1560
1561
1562 \end_layout
1563
1564 '''
1565     i = 0
1566     while True:
1567         i = find_token(document.body, '\\begin_inset listings', i)
1568         if i == -1:
1569             break
1570         else:
1571             if not '\\usepackage{listings}' in document.preamble:
1572                 document.preamble.append('\\usepackage{listings}')
1573         j = find_end_of_inset(document.body, i + 1)
1574         if j == -1:
1575             # this should not happen
1576             break
1577         inline = 'false'
1578         params = ''
1579         status = 'open'
1580         # first three lines
1581         for line in range(i + 1, i + 4):
1582             if document.body[line].startswith('inline'):
1583                 inline = document.body[line].split()[1]
1584             if document.body[line].startswith('lstparams'):
1585                 params = document.body[line].split()[1].strip('"')
1586             if document.body[line].startswith('status'):
1587                 status = document.body[line].split()[1].strip()
1588                 k = line + 1
1589         # caption?
1590         caption = ''
1591         label = ''
1592         cap = find_token(document.body, '\\begin_inset Caption', i)
1593         if cap != -1:
1594             cap_end = find_end_of_inset(document.body, cap + 1)
1595             if cap_end == -1:
1596                 # this should not happen
1597                 break
1598             # label?
1599             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1600             if lbl != -1:
1601                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1602                 if lbl_end == -1:
1603                     # this should not happen
1604                     break
1605             else:
1606                 lbl = cap_end
1607                 lbl_end = cap_end
1608             for line in document.body[lbl : lbl_end + 1]:
1609                 if line.startswith('name '):
1610                     label = line.split()[1].strip('"')
1611                     break
1612             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1613                 if not line.startswith('\\'):
1614                     caption += line.strip()
1615             k = cap_end + 1
1616         inlinecode = ''
1617         # looking for the oneline code for lstinline
1618         inlinecode = document.body[find_end_of_layout(document.body,
1619             find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
1620         if len(caption) > 0:
1621             if len(params) == 0:
1622                 params = 'caption={%s}' % caption
1623             else:
1624                 params += ',caption={%s}' % caption
1625         if len(label) > 0:
1626             if len(params) == 0:
1627                 params = 'label={%s}' % label
1628             else:
1629                 params += ',label={%s}' % label
1630         if len(params) > 0:
1631             params = '[%s]' % params
1632             params = params.replace('\\', '\\backslash\n')
1633         if inline == 'true':
1634             document.body[i:(j+1)] = [r'\begin_inset ERT',
1635                                       'status %s' % status,
1636                                       r'\begin_layout Standard',
1637                                       '',
1638                                       '',
1639                                       r'\backslash',
1640                                       'lstinline%s{%s}' % (params, inlinecode),
1641                                       r'\end_layout',
1642                                       '',
1643                                       r'\end_inset']
1644         else:
1645             document.body[i: j+1] =  [r'\begin_inset ERT',
1646                                       'status %s' % status,
1647                                       '',
1648                                       r'\begin_layout Standard',
1649                                       '',
1650                                       '',
1651                                       r'\backslash',
1652                                       r'begin{lstlisting}%s' % params,
1653                                       r'\end_layout'
1654                                     ] + document.body[k : j - 1] + \
1655                                      ['',
1656                                       r'\begin_layout Standard',
1657                                       '',
1658                                       r'\backslash',
1659                                       'end{lstlisting}',
1660                                       r'\end_layout',
1661                                       '',
1662                                       r'\end_inset']
1663
1664
1665 def revert_include_listings(document):
1666     r''' Revert lstinputlisting Include option , translate
1667 \begin_inset Include \lstinputlisting{file}[opt]
1668 preview false
1669
1670 \end_inset
1671
1672 TO
1673
1674 \begin_inset ERT
1675 status open
1676
1677 \begin_layout Standard
1678
1679
1680 \backslash
1681 lstinputlisting{file}[opt]
1682 \end_layout
1683
1684 \end_inset
1685     '''
1686
1687     i = 0
1688     while True:
1689         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1690         if i == -1:
1691             break
1692         else:
1693             if not '\\usepackage{listings}' in document.preamble:
1694                 document.preamble.append('\\usepackage{listings}')
1695         j = find_end_of_inset(document.body, i + 1)
1696         if j == -1:
1697             # this should not happen
1698             break
1699         # find command line lstinputlisting{file}[options]
1700         cmd, file, option = '', '', ''
1701         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1702             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1703         option = option.replace('\\', '\\backslash\n')
1704         document.body[i : j + 1] = [r'\begin_inset ERT',
1705                                     'status open',
1706                                     '',
1707                                     r'\begin_layout Standard',
1708                                     '',
1709                                     '',
1710                                     r'\backslash',
1711                                     '%s%s{%s}' % (cmd, option, file),
1712                                     r'\end_layout',
1713                                     '',
1714                                     r'\end_inset']
1715
1716
1717 def revert_ext_font_sizes(document):
1718     if document.backend != "latex": return
1719     if not document.textclass.startswith("ext"): return
1720
1721     fontsize = get_value(document.header, '\\paperfontsize', 0)
1722     if fontsize not in ('10', '11', '12'): return
1723     fontsize += 'pt'
1724
1725     i = find_token(document.header, '\\paperfontsize', 0)
1726     document.header[i] = '\\paperfontsize default'
1727
1728     i = find_token(document.header, '\\options', 0)
1729     if i == -1:
1730         i = find_token(document.header, '\\textclass', 0) + 1
1731         document.header[i:i] = ['\\options %s' % fontsize]
1732     else:
1733         document.header[i] += ',%s' % fontsize
1734
1735
1736 def convert_ext_font_sizes(document):
1737     if document.backend != "latex": return
1738     if not document.textclass.startswith("ext"): return
1739
1740     fontsize = get_value(document.header, '\\paperfontsize', 0)
1741     if fontsize != 'default': return
1742
1743     i = find_token(document.header, '\\options', 0)
1744     if i == -1: return
1745
1746     options = get_value(document.header, '\\options', i)
1747
1748     fontsizes = '10pt', '11pt', '12pt'
1749     for fs in fontsizes:
1750         if options.find(fs) != -1:
1751             break
1752     else: # this else will only be attained if the for cycle had no match
1753         return
1754
1755     options = options.split(',')
1756     for j, opt in enumerate(options):
1757         if opt in fontsizes:
1758             fontsize = opt[:-2]
1759             del options[j]
1760             break
1761     else:
1762         return
1763
1764     k = find_token(document.header, '\\paperfontsize', 0)
1765     document.header[k] = '\\paperfontsize %s' % fontsize
1766
1767     if options:
1768         document.header[i] = '\\options %s' % ','.join(options)
1769     else:
1770         del document.header[i]
1771
1772
1773 def revert_separator_layout(document):
1774     r'''Revert --Separator-- to a lyx note
1775 From
1776
1777 \begin_layout --Separator--
1778 something
1779 \end_layout
1780
1781 to
1782
1783 \begin_layout Standard
1784 \begin_inset Note Note
1785 status open
1786
1787 \begin_layout Standard
1788 Separate Evironment
1789 \end_layout
1790
1791 \end_inset
1792 something
1793
1794 \end_layout
1795
1796     '''
1797
1798     i = 0
1799     while True:
1800         i = find_token(document.body, r'\begin_layout --Separator--', i)
1801         if i == -1:
1802             break
1803         j = find_end_of_layout(document.body, i + 1)
1804         if j == -1:
1805             # this should not happen
1806             break
1807         document.body[i : j + 1] = [r'\begin_layout Standard',
1808                                     r'\begin_inset Note Note',
1809                                     'status open',
1810                                     '',
1811                                     r'\begin_layout Standard',
1812                                     'Separate Environment',
1813                                     r'\end_layout',
1814                                     '',
1815                                     r'\end_inset'] + \
1816                                     document.body[ i + 1 : j] + \
1817                                     ['',
1818                                     r'\end_layout'
1819                                     ]
1820
1821
1822 def convert_arabic (document):
1823     if document.language == "arabic":
1824         document.language = "arabic_arabtex"
1825         i = find_token(document.header, "\\language", 0)
1826         if i != -1:
1827             document.header[i] = "\\language arabic_arabtex"
1828     i = 0
1829     while i < len(document.body):
1830         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1831         if (h != -1):
1832             # change the language name
1833             document.body[i] = '\lang arabic_arabtex'
1834         i = i + 1
1835
1836
1837 def revert_arabic (document):
1838     if document.language == "arabic_arabtex":
1839         document.language = "arabic"
1840         i = find_token(document.header, "\\language", 0)
1841         if i != -1:
1842             document.header[i] = "\\language arabic"
1843     i = 0
1844     while i < len(document.body):
1845         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1846         if (h != -1):
1847             # change the language name
1848             document.body[i] = '\lang arabic'
1849         i = i + 1
1850
1851
1852 def read_unicodesymbols():
1853     " Read the unicodesymbols list of unicode characters and corresponding commands."
1854     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
1855     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
1856     spec_chars = {}
1857     for line in fp.readlines():
1858         if line[0] != '#':
1859             line=line.replace(' "',' ') # remove all quotation marks with spaces before
1860             line=line.replace('" ',' ') # remove all quotation marks with spaces after
1861             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
1862             try:
1863                 # flag1 and flag2 are preamble and other flags
1864                 [ucs4,command,flag1,flag2] =line.split(None,3)
1865                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1866             except:
1867                 pass
1868     fp.close()
1869
1870     return spec_chars
1871
1872
1873 def revert_unicode(document):
1874     '''Transform unicode characters that can not be written using the
1875 document encoding to commands according to the unicodesymbols
1876 file. Characters that can not be replaced by commands are replaced by
1877 an replacement string.  Flags other than 'combined' are currently not
1878 implemented.'''
1879
1880     replacement_character = '???'
1881     spec_chars = read_unicodesymbols()
1882
1883     # Define strings to start and end ERT and math insets
1884     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
1885     ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
1886     math_intro='\n\\begin_inset Formula $'
1887     math_outro='$\n\\end_inset\n'
1888     # Find unicode characters and replace them
1889     in_ert = False # flag set to 1 if in ERT inset
1890     in_math = False # flag set to 1 if in math inset
1891     temp_file = os.tmpfile()
1892     insets = [] # list of active insets
1893     mod_body = u'' # to store the modified document body
1894
1895     # Go through the file to capture all combining characters
1896     last_char = '' # to store the previous character
1897     body_string = u'' # store the document temporarily as a string
1898     for line in document.body:
1899         body_string = body_string + line +'\n'
1900     [body_string, apa] = body_string.rsplit('\n',1)
1901
1902     body = body_string.split('\n')
1903     for line in body:
1904         # Check for insets
1905         if line.find('\\begin_inset') > -1:
1906             # check which inset to start
1907             if line.find('\\begin_inset ERT') > -1:
1908                 in_ert = True
1909                 insets.append('ert')
1910             elif line.find('\\begin_inset Formula') > -1:
1911                 in_math = True
1912                 insets.append('math')
1913             else:
1914                 insets.append('other')
1915         if line.find('\\end_inset') > -1:
1916             # check which inset to end
1917             try:
1918                 cur_inset = insets.pop()
1919                 if cur_inset == 'ert':
1920                     in_ert = False
1921                 elif cur_inset == 'math':
1922                     in_math = False
1923                 else:
1924                     pass # end of other inset
1925             except:
1926                 pass # inset list was empty (for some reason)
1927
1928         # Try to write the line
1929         try:
1930             # If all goes well the line is written here
1931             temp_file.write(line.encode(document.encoding) + '\n')
1932             mod_body = mod_body + line + '\n'
1933             last_char = line[-1]
1934         except:
1935             # Error, some character(s) in the line need to be replaced
1936             for character in line:
1937                 try:
1938                     # Try to write the character
1939                     temp_file.write(character.encode(document.encoding))
1940                     mod_body = mod_body + character
1941                     last_char = character
1942                 except:
1943                     # Try to replace with ERT/math inset
1944                     if spec_chars.has_key(character):
1945                         command = spec_chars[character][0]; # the command to replace unicode
1946                         flag1 = spec_chars[character][1]
1947                         flag2 = spec_chars[character][2]
1948                         if flag1.find('combining') > -1 or flag2.find('combining') > -1:
1949                             # We have a character that should be combined with the previous
1950                             command = command + '{' +last_char + '}'
1951                             # Remove the last character. Ignore if it is whitespace
1952                             if len(last_char.rstrip()) > 0:
1953                                 # last_char was found and is not whitespace
1954                                 [mod_body, apa] = mod_body.rsplit(last_char,1)
1955                             else:
1956                                 # The last character was replaced by a command. For now it is
1957                                 # ignored. This could be handled better.
1958                                 pass
1959                         if command[0:2] == '\\\\':
1960                             if command[2:12]=='ensuremath':
1961                                 if in_ert == True:
1962                                     # math in ERT
1963                                     command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1964                                     command = command.replace('}', '$\n')
1965                                 elif in_math == False:
1966                                     # add a math inset with the replacement character
1967                                     command = command.replace('\\\\ensuremath{\\', math_intro)
1968                                     command = command.replace('}', math_outro)
1969                                 else:
1970                                     # we are already in a math inset
1971                                     command = command.replace('\\\\ensuremath{\\', '')
1972                                     command = command.replace('}', '')
1973                             else:
1974                                 if in_math == True:
1975                                     # avoid putting an ERT in a math; instead put command as text
1976                                     command = command.replace('\\\\', '\mathrm{')
1977                                     command = command + '}'
1978                                 elif in_ert == False:
1979                                     # add an ERT inset with the replacement character
1980                                     command = command.replace('\\\\', ert_intro)
1981                                     command = command + ert_outro
1982                                 else:
1983                                     command = command.replace('\\\\', '\n\\backslash\n')
1984                             last_char = '' # indicate that the character should not be removed
1985                         mod_body = mod_body + command
1986                     else:
1987                         # Replace with replacement string
1988                         mod_body = mod_body + replacement_character
1989     [mod_body, apa] = mod_body.rsplit('\n',1)
1990     document.body = mod_body.split('\n')
1991     temp_file.close()
1992
1993
1994 ##
1995 # Conversion hub
1996 #
1997
1998 supported_versions = ["1.5.0","1.5"]
1999 convert = [[246, []],
2000            [247, [convert_font_settings]],
2001            [248, []],
2002            [249, [convert_utf8]],
2003            [250, []],
2004            [251, []],
2005            [252, [convert_commandparams, convert_bibitem]],
2006            [253, []],
2007            [254, [convert_esint]],
2008            [255, []],
2009            [256, []],
2010            [257, [convert_caption]],
2011            [258, [convert_lyxline]],
2012            [259, [convert_accent, normalize_font_whitespace_259]],
2013            [260, []],
2014            [261, [convert_changes]],
2015            [262, []],
2016            [263, [normalize_language_name]],
2017            [264, [convert_cv_textclass]],
2018            [265, [convert_tableborder]],
2019            [266, []],
2020            [267, []],
2021            [268, []],
2022            [269, []],
2023            [270, []],
2024            [271, [convert_ext_font_sizes]],
2025            [272, []],
2026            [273, []],
2027            [274, [normalize_font_whitespace_274]],
2028            [275, [convert_graphics_rotation]],
2029            [276, [convert_arabic]]
2030           ]
2031
2032 revert =  [
2033            [275, [revert_arabic]],
2034            [274, [revert_graphics_rotation]],
2035            [273, []],
2036            [272, [revert_separator_layout]],
2037            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2038            [270, [revert_ext_font_sizes]],
2039            [269, [revert_beamer_alert, revert_beamer_structure]],
2040            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2041            [267, [revert_CJK]],
2042            [266, [revert_utf8plain]],
2043            [265, [revert_armenian]],
2044            [264, [revert_tableborder]],
2045            [263, [revert_cv_textclass]],
2046            [262, [revert_language_name]],
2047            [261, [revert_ascii]],
2048            [260, []],
2049            [259, [revert_utf8x]],
2050            [258, []],
2051            [257, []],
2052            [256, [revert_caption]],
2053            [255, [revert_encodings]],
2054            [254, [revert_clearpage, revert_cleardoublepage]],
2055            [253, [revert_esint]],
2056            [252, [revert_nomenclature, revert_printnomenclature]],
2057            [251, [revert_commandparams]],
2058            [250, [revert_cs_label]],
2059            [249, []],
2060            [248, [revert_accent, revert_utf8, revert_unicode]],
2061            [247, [revert_booktabs]],
2062            [246, [revert_font_settings]],
2063            [245, [revert_framed]]]
2064
2065
2066 if __name__ == "__main__":
2067     pass
2068
2069