lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     if document.cjk_encoding != '':
 250         return
 251     encoding_stack = [document.encoding]
 252     lang_re = re.compile(r"^\\lang\s(\S+)")
 253     if document.inputencoding == "auto" or document.inputencoding == "default":
 254         for i in range(len(document.body)):
 255             result = lang_re.match(document.body[i])
 256             if result:
 257                 language = result.group(1)
 258                 if language == "default":
 259                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 260                     encoding_stack[-1] = document.encoding
 261                 else:
 262                     from lyx2lyx_lang import lang
 263                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 264                     encoding_stack[-1] = lang[language][3]
 265             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 266                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 267                 encoding_stack.append(encoding_stack[-1])
 268             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 269                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(encoding_stack) == 1:
 271                     # Don't remove the document encoding from the stack
 272                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 273                 else:
 274                     del encoding_stack[-1]
 275             if encoding_stack[-1] != document.encoding:
 276                 if forward:
 277                     # This line has been incorrectly interpreted as if it was
 278                     # encoded in 'encoding'.
 279                     # Convert back to the 8bit string that was in the file.
 280                     orig = document.body[i].encode(document.encoding)
 281                     # Convert the 8bit string that was in the file to unicode
 282                     # with the correct encoding.
 283                     document.body[i] = orig.decode(encoding_stack[-1])
 284                 else:
 285                     # Convert unicode to the 8bit string that will be written
 286                     # to the file with the correct encoding.
 287                     orig = document.body[i].encode(encoding_stack[-1])
 288                     # Convert the 8bit string that will be written to the
 289                     # file to fake unicode with the encoding that will later
 290                     # be used when writing to the file.
 291                     document.body[i] = orig.decode(document.encoding)
 292
 293
 294 def convert_utf8(document):
 295     " Set document encoding to UTF-8. "
 296     convert_multiencoding(document, True)
 297     document.encoding = "utf8"
 298
 299
 300 def revert_utf8(document):
 301     " Set document encoding to the value corresponding to inputencoding. "
 302     i = find_token(document.header, "\\inputencoding", 0)
 303     if i == -1:
 304         document.header.append("\\inputencoding auto")
 305     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 306         document.header[i] = "\\inputencoding auto"
 307     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 308     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 309     convert_multiencoding(document, False)
 310
 311
 312 def revert_cs_label(document):
 313     " Remove status flag of charstyle label. "
 314     i = 0
 315     while 1:
 316         i = find_token(document.body, "\\begin_inset CharStyle", i)
 317         if i == -1:
 318             return
 319         # Seach for a line starting 'show_label'
 320         # If it is not there, break with a warning message
 321         i = i + 1
 322         while 1:
 323             if (document.body[i][:10] == "show_label"):
 324                 del document.body[i]
 325                 break
 326             elif (document.body[i][:13] == "\\begin_layout"):
 327                 document.warning("Malformed LyX document: Missing 'show_label'.")
 328                 break
 329             i = i + 1
 330
 331         i = i + 1
 332
 333
 334 def convert_bibitem(document):
 335     """ Convert
 336 \bibitem [option]{argument}
 337
 338 to
 339
 340 \begin_inset LatexCommand bibitem
 341 label "option"
 342 key "argument"
 343
 344 \end_inset
 345
 346 This must be called after convert_commandparams.
 347 """
 348     i = 0
 349     while 1:
 350         i = find_token(document.body, "\\bibitem", i)
 351         if i == -1:
 352             break
 353         j = document.body[i].find('[') + 1
 354         k = document.body[i].rfind(']')
 355         if j == 0: # No optional argument found
 356             option = None
 357         else:
 358             option = document.body[i][j:k]
 359         j = document.body[i].rfind('{') + 1
 360         k = document.body[i].rfind('}')
 361         argument = document.body[i][j:k]
 362         lines = ['\\begin_inset LatexCommand bibitem']
 363         if option != None:
 364             lines.append('label "%s"' % option.replace('"', '\\"'))
 365         lines.append('key "%s"' % argument.replace('"', '\\"'))
 366         lines.append('')
 367         lines.append('\\end_inset')
 368         document.body[i:i+1] = lines
 369         i = i + 1
 370
 371
 372 commandparams_info = {
 373     # command : [option1, option2, argument]
 374     "bibitem" : ["label", "", "key"],
 375     "bibtex" : ["options", "btprint", "bibfiles"],
 376     "cite"        : ["after", "before", "key"],
 377     "citet"       : ["after", "before", "key"],
 378     "citep"       : ["after", "before", "key"],
 379     "citealt"     : ["after", "before", "key"],
 380     "citealp"     : ["after", "before", "key"],
 381     "citeauthor"  : ["after", "before", "key"],
 382     "citeyear"    : ["after", "before", "key"],
 383     "citeyearpar" : ["after", "before", "key"],
 384     "citet*"      : ["after", "before", "key"],
 385     "citep*"      : ["after", "before", "key"],
 386     "citealt*"    : ["after", "before", "key"],
 387     "citealp*"    : ["after", "before", "key"],
 388     "citeauthor*" : ["after", "before", "key"],
 389     "Citet"       : ["after", "before", "key"],
 390     "Citep"       : ["after", "before", "key"],
 391     "Citealt"     : ["after", "before", "key"],
 392     "Citealp"     : ["after", "before", "key"],
 393     "Citeauthor"  : ["after", "before", "key"],
 394     "Citet*"      : ["after", "before", "key"],
 395     "Citep*"      : ["after", "before", "key"],
 396     "Citealt*"    : ["after", "before", "key"],
 397     "Citealp*"    : ["after", "before", "key"],
 398     "Citeauthor*" : ["after", "before", "key"],
 399     "citefield"   : ["after", "before", "key"],
 400     "citetitle"   : ["after", "before", "key"],
 401     "cite*"       : ["after", "before", "key"],
 402     "hfill" : ["", "", ""],
 403     "index"      : ["", "", "name"],
 404     "printindex" : ["", "", "name"],
 405     "label" : ["", "", "name"],
 406     "eqref"     : ["name", "", "reference"],
 407     "pageref"   : ["name", "", "reference"],
 408     "prettyref" : ["name", "", "reference"],
 409     "ref"       : ["name", "", "reference"],
 410     "vpageref"  : ["name", "", "reference"],
 411     "vref"      : ["name", "", "reference"],
 412     "tableofcontents" : ["", "", "type"],
 413     "htmlurl" : ["name", "", "target"],
 414     "url"     : ["name", "", "target"]}
 415
 416
 417 def convert_commandparams(document):
 418     """ Convert
 419
 420  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 421  \end_inset
 422
 423  to
 424
 425  \begin_inset LatexCommand cmdname
 426  name1 "opt1"
 427  name2 "opt2"
 428  name3 "arg"
 429  \end_inset
 430
 431  name1, name2 and name3 can be different for each command.
 432 """
 433     # \begin_inset LatexCommand bibitem was not the official version (see
 434     # convert_bibitem()), but could be read in, so we convert it here, too.
 435
 436     i = 0
 437     while 1:
 438         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 439         if i == -1:
 440             break
 441         command = document.body[i][26:].strip()
 442         if command == "":
 443             document.warning("Malformed LyX document: Missing LatexCommand name.")
 444             i = i + 1
 445             continue
 446
 447         j = find_token(document.body, "\\end_inset", i + 1)
 448         if j == -1:
 449             document.warning("Malformed document")
 450         else:
 451             command += "".join(document.body[i+1:j])
 452             document.body[i+1:j] = []
 453
 454         # The following parser is taken from the original InsetCommandParams::scanCommand
 455         name = ""
 456         option1 = ""
 457         option2 = ""
 458         argument = ""
 459         state = "WS"
 460         # Used to handle things like \command[foo[bar]]{foo{bar}}
 461         nestdepth = 0
 462         b = 0
 463         for c in command:
 464             if ((state == "CMDNAME" and c == ' ') or
 465                 (state == "CMDNAME" and c == '[') or
 466                 (state == "CMDNAME" and c == '{')):
 467                 state = "WS"
 468             if ((state == "OPTION" and c == ']') or
 469                 (state == "SECOPTION" and c == ']') or
 470                 (state == "CONTENT" and c == '}')):
 471                 if nestdepth == 0:
 472                     state = "WS"
 473                 else:
 474                     nestdepth = nestdepth - 1
 475             if ((state == "OPTION" and c == '[') or
 476                 (state == "SECOPTION" and c == '[') or
 477                 (state == "CONTENT" and c == '{')):
 478                 nestdepth = nestdepth + 1
 479             if state == "CMDNAME":
 480                     name += c
 481             elif state == "OPTION":
 482                     option1 += c
 483             elif state == "SECOPTION":
 484                     option2 += c
 485             elif state == "CONTENT":
 486                     argument += c
 487             elif state == "WS":
 488                 if c == '\\':
 489                     state = "CMDNAME"
 490                 elif c == '[' and b != ']':
 491                     state = "OPTION"
 492                     nestdepth = 0 # Just to be sure
 493                 elif c == '[' and b == ']':
 494                     state = "SECOPTION"
 495                     nestdepth = 0 # Just to be sure
 496                 elif c == '{':
 497                     state = "CONTENT"
 498                     nestdepth = 0 # Just to be sure
 499             b = c
 500
 501         # Now we have parsed the command, output the parameters
 502         lines = ["\\begin_inset LatexCommand %s" % name]
 503         if option1 != "":
 504             if commandparams_info[name][0] == "":
 505                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 506             else:
 507                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 508         if option2 != "":
 509             if commandparams_info[name][1] == "":
 510                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 511             else:
 512                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 513         if argument != "":
 514             if commandparams_info[name][2] == "":
 515                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 516             else:
 517                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 518         document.body[i:i+1] = lines
 519         i = i + 1
 520
 521
 522 def revert_commandparams(document):
 523     regex = re.compile(r'(\S+)\s+(.+)')
 524     i = 0
 525     while 1:
 526         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 527         if i == -1:
 528             break
 529         name = document.body[i].split()[2]
 530         j = find_end_of_inset(document.body, i + 1)
 531         preview_line = ""
 532         option1 = ""
 533         option2 = ""
 534         argument = ""
 535         for k in range(i + 1, j):
 536             match = re.match(regex, document.body[k])
 537             if match:
 538                 pname = match.group(1)
 539                 pvalue = match.group(2)
 540                 if pname == "preview":
 541                     preview_line = document.body[k]
 542                 elif (commandparams_info[name][0] != "" and
 543                       pname == commandparams_info[name][0]):
 544                     option1 = pvalue.strip('"').replace('\\"', '"')
 545                 elif (commandparams_info[name][1] != "" and
 546                       pname == commandparams_info[name][1]):
 547                     option2 = pvalue.strip('"').replace('\\"', '"')
 548                 elif (commandparams_info[name][2] != "" and
 549                       pname == commandparams_info[name][2]):
 550                     argument = pvalue.strip('"').replace('\\"', '"')
 551             elif document.body[k].strip() != "":
 552                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 553         if name == "bibitem":
 554             if option1 == "":
 555                 lines = ["\\bibitem {%s}" % argument]
 556             else:
 557                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 558         else:
 559             if option1 == "":
 560                 if option2 == "":
 561                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 562                 else:
 563                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 564             else:
 565                 if option2 == "":
 566                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 567                 else:
 568                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 569         if name != "bibitem":
 570             if preview_line != "":
 571                 lines.append(preview_line)
 572             lines.append('')
 573             lines.append('\\end_inset')
 574         document.body[i:j+1] = lines
 575         i = j + 1
 576
 577
 578 def revert_nomenclature(document):
 579     " Convert nomenclature entry to ERT. "
 580     regex = re.compile(r'(\S+)\s+(.+)')
 581     i = 0
 582     use_nomencl = 0
 583     while 1:
 584         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 585         if i == -1:
 586             break
 587         use_nomencl = 1
 588         j = find_end_of_inset(document.body, i + 1)
 589         preview_line = ""
 590         symbol = ""
 591         description = ""
 592         prefix = ""
 593         for k in range(i + 1, j):
 594             match = re.match(regex, document.body[k])
 595             if match:
 596                 name = match.group(1)
 597                 value = match.group(2)
 598                 if name == "preview":
 599                     preview_line = document.body[k]
 600                 elif name == "symbol":
 601                     symbol = value.strip('"').replace('\\"', '"')
 602                 elif name == "description":
 603                     description = value.strip('"').replace('\\"', '"')
 604                 elif name == "prefix":
 605                     prefix = value.strip('"').replace('\\"', '"')
 606             elif document.body[k].strip() != "":
 607                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 608         if prefix == "":
 609             command = 'nomenclature{%s}{%s}' % (symbol, description)
 610         else:
 611             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 612         document.body[i:j+1] = ['\\begin_inset ERT',
 613                                 'status collapsed',
 614                                 '',
 615                                 '\\begin_layout %s' % document.default_layout,
 616                                 '',
 617                                 '',
 618                                 '\\backslash',
 619                                 command,
 620                                 '\\end_layout',
 621                                 '',
 622                                 '\\end_inset']
 623         i = i + 11
 624     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 625         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 626         document.preamble.append('\\makenomenclature')
 627
 628
 629 def revert_printnomenclature(document):
 630     " Convert printnomenclature to ERT. "
 631     regex = re.compile(r'(\S+)\s+(.+)')
 632     i = 0
 633     use_nomencl = 0
 634     while 1:
 635         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 636         if i == -1:
 637             break
 638         use_nomencl = 1
 639         j = find_end_of_inset(document.body, i + 1)
 640         preview_line = ""
 641         labelwidth = ""
 642         for k in range(i + 1, j):
 643             match = re.match(regex, document.body[k])
 644             if match:
 645                 name = match.group(1)
 646                 value = match.group(2)
 647                 if name == "preview":
 648                     preview_line = document.body[k]
 649                 elif name == "labelwidth":
 650                     labelwidth = value.strip('"').replace('\\"', '"')
 651             elif document.body[k].strip() != "":
 652                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 653         if labelwidth == "":
 654             command = 'nomenclature{}'
 655         else:
 656             command = 'nomenclature[%s]' % labelwidth
 657         document.body[i:j+1] = ['\\begin_inset ERT',
 658                                 'status collapsed',
 659                                 '',
 660                                 '\\begin_layout %s' % document.default_layout,
 661                                 '',
 662                                 '',
 663                                 '\\backslash',
 664                                 command,
 665                                 '\\end_layout',
 666                                 '',
 667                                 '\\end_inset']
 668         i = i + 11
 669     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 670         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 671         document.preamble.append('\\makenomenclature')
 672
 673
 674 def convert_esint(document):
 675     " Add \\use_esint setting to header. "
 676     i = find_token(document.header, "\\cite_engine", 0)
 677     if i == -1:
 678         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 679         return
 680     # 0 is off, 1 is auto, 2 is on.
 681     document.header.insert(i, '\\use_esint 0')
 682
 683
 684 def revert_esint(document):
 685     " Remove \\use_esint setting from header. "
 686     i = find_token(document.header, "\\use_esint", 0)
 687     if i == -1:
 688         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 689         return
 690     use_esint = document.header[i].split()[1]
 691     del document.header[i]
 692     # 0 is off, 1 is auto, 2 is on.
 693     if (use_esint == 2):
 694         document.preamble.append('\\usepackage{esint}')
 695
 696
 697 def revert_clearpage(document):
 698     " clearpage -> ERT "
 699     i = 0
 700     while 1:
 701         i = find_token(document.body, "\\clearpage", i)
 702         if i == -1:
 703             break
 704         document.body[i:i+1] =  ['\\begin_inset ERT',
 705                                 'status collapsed',
 706                                 '',
 707                                 '\\begin_layout %s' % document.default_layout,
 708                                 '',
 709                                 '',
 710                                 '\\backslash',
 711                                 'clearpage',
 712                                 '\\end_layout',
 713                                 '',
 714                                 '\\end_inset']
 715     i = i + 1
 716
 717
 718 def revert_cleardoublepage(document):
 719     " cleardoublepage -> ERT "
 720     i = 0
 721     while 1:
 722         i = find_token(document.body, "\\cleardoublepage", i)
 723         if i == -1:
 724             break
 725         document.body[i:i+1] =  ['\\begin_inset ERT',
 726                                 'status collapsed',
 727                                 '',
 728                                 '\\begin_layout %s' % document.default_layout,
 729                                 '',
 730                                 '',
 731                                 '\\backslash',
 732                                 'cleardoublepage',
 733                                 '\\end_layout',
 734                                 '',
 735                                 '\\end_inset']
 736     i = i + 1
 737
 738
 739 def convert_lyxline(document):
 740     " remove fontsize commands for \lyxline "
 741     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 742     # to change the line thickness. The new definiton does this so that imported
 743     # \lyxlines would have a different line thickness. The eventual fontsize command
 744     # before \lyxline is therefore removed to get the same output.
 745     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 746                  "large", "Large", "LARGE", "huge", "Huge"]
 747     for n in range(0, len(fontsizes)):
 748         i = 0
 749         k = 0
 750         while i < len(document.body):
 751             i = find_token(document.body, "\\size " + fontsizes[n], i)
 752             k = find_token(document.body, "\\lyxline", i)
 753             # the corresponding fontsize command is always 2 lines before the \lyxline
 754             if (i != -1 and k == i+2):
 755                 document.body[i:i+1] = []
 756             else:
 757                 break
 758         i = i + 1
 759
 760
 761 def revert_encodings(document):
 762     " Set new encodings to auto. "
 763     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 764                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 765                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 766     i = find_token(document.header, "\\inputencoding", 0)
 767     if i == -1:
 768         document.header.append("\\inputencoding auto")
 769     else:
 770         inputenc = get_value(document.header, "\\inputencoding", i)
 771         if inputenc in encodings:
 772             document.header[i] = "\\inputencoding auto"
 773     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 774
 775
 776 def convert_caption(document):
 777     " Convert caption layouts to caption insets. "
 778     i = 0
 779     while 1:
 780         i = find_token(document.body, "\\begin_layout Caption", i)
 781         if i == -1:
 782             return
 783         j = find_end_of_layout(document.body, i)
 784         if j == -1:
 785             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 786             return
 787
 788         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 789         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 790                             "\\begin_inset Caption", "",
 791                             "\\begin_layout %s" % document.default_layout]
 792         i = i + 1
 793
 794
 795 def revert_caption(document):
 796     " Convert caption insets to caption layouts. "
 797     " This assumes that the text class has a caption style. "
 798     i = 0
 799     while 1:
 800         i = find_token(document.body, "\\begin_inset Caption", i)
 801         if i == -1:
 802             return
 803
 804         # We either need to delete the previous \begin_layout line, or we
 805         # need to end the previous layout if this inset is not in the first
 806         # position of the paragraph.
 807         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 808         if layout_before == -1:
 809             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 810             return
 811         layout_line = document.body[layout_before]
 812         del_layout_before = True
 813         l = layout_before + 1
 814         while l < i:
 815             if document.body[l] != "":
 816                 del_layout_before = False
 817                 break
 818             l = l + 1
 819         if del_layout_before:
 820             del document.body[layout_before:i]
 821             i = layout_before
 822         else:
 823             document.body[i:i] = ["\\end_layout", ""]
 824             i = i + 2
 825
 826         # Find start of layout in the inset and end of inset
 827         j = find_token(document.body, "\\begin_layout", i)
 828         if j == -1:
 829             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 830             return
 831         k = find_end_of_inset(document.body, i)
 832         if k == -1:
 833             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 834             return
 835
 836         # We either need to delete the following \end_layout line, or we need
 837         # to restart the old layout if this inset is not at the paragraph end.
 838         layout_after = find_token(document.body, "\\end_layout", k)
 839         if layout_after == -1:
 840             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 841             return
 842         del_layout_after = True
 843         l = k + 1
 844         while l < layout_after:
 845             if document.body[l] != "":
 846                 del_layout_after = False
 847                 break
 848             l = l + 1
 849         if del_layout_after:
 850             del document.body[k+1:layout_after+1]
 851         else:
 852             document.body[k+1:k+1] = [layout_line, ""]
 853
 854         # delete \begin_layout and \end_inset and replace \begin_inset with
 855         # "\begin_layout Caption". This works because we can only have one
 856         # paragraph in the caption inset: The old \end_layout will be recycled.
 857         del document.body[k]
 858         if document.body[k] == "":
 859             del document.body[k]
 860         del document.body[j]
 861         if document.body[j] == "":
 862             del document.body[j]
 863         document.body[i] = "\\begin_layout Caption"
 864         if document.body[i+1] == "":
 865             del document.body[i+1]
 866         i = i + 1
 867
 868
 869 # Accents of InsetLaTeXAccent
 870 accent_map = {
 871     "`" : u'\u0300', # grave
 872     "'" : u'\u0301', # acute
 873     "^" : u'\u0302', # circumflex
 874     "~" : u'\u0303', # tilde
 875     "=" : u'\u0304', # macron
 876     "u" : u'\u0306', # breve
 877     "." : u'\u0307', # dot above
 878     "\"": u'\u0308', # diaeresis
 879     "r" : u'\u030a', # ring above
 880     "H" : u'\u030b', # double acute
 881     "v" : u'\u030c', # caron
 882     "b" : u'\u0320', # minus sign below
 883     "d" : u'\u0323', # dot below
 884     "c" : u'\u0327', # cedilla
 885     "k" : u'\u0328', # ogonek
 886     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 887                      # only one is given as argument, so we don't need to
 888                      # treat it differently.
 889 }
 890
 891
 892 # special accents of InsetLaTeXAccent without argument
 893 special_accent_map = {
 894     'i' : u'\u0131', # dotless i
 895     'j' : u'\u0237', # dotless j
 896     'l' : u'\u0142', # l with stroke
 897     'L' : u'\u0141'  # L with stroke
 898 }
 899
 900
 901 # special accent arguments of InsetLaTeXAccent
 902 accented_map = {
 903     '\\i' : u'\u0131', # dotless i
 904     '\\j' : u'\u0237'  # dotless j
 905 }
 906
 907
 908 def _convert_accent(accent, accented_char):
 909     type = accent
 910     char = accented_char
 911     if char == '':
 912         if type in special_accent_map:
 913             return special_accent_map[type]
 914         # a missing char is treated as space by LyX
 915         char = ' '
 916     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 917         # Special caron, only used with t, d, l and L.
 918         # It is not in the map because we convert it to the same unicode
 919         # character as the normal caron: \q{} is only defined if babel with
 920         # the czech or slovak language is used, and the normal caron
 921         # produces the correct output if the T1 font encoding is used.
 922         # For the same reason we never convert to \q{} in the other direction.
 923         type = 'v'
 924     elif char in accented_map:
 925         char = accented_map[char]
 926     elif (len(char) > 1):
 927         # We can only convert accents on a single char
 928         return ''
 929     a = accent_map.get(type)
 930     if a:
 931         return unicodedata.normalize("NFC", "%s%s" % (char, a))
 932     return ''
 933
 934
 935 def convert_ertbackslash(body, i, ert, default_layout):
 936     r""" -------------------------------------------------------------------------------------------
 937     Convert backslashes and '\n' into valid ERT code, append the converted
 938     text to body[i] and return the (maybe incremented) line index i"""
 939
 940     for c in ert:
 941         if c == '\\':
 942             body[i] = body[i] + '\\backslash '
 943             i = i + 1
 944             body.insert(i, '')
 945         elif c == '\n':
 946             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 947             i = i + 4
 948         else:
 949             body[i] = body[i] + c
 950     return i
 951
 952
 953 def convert_accent(document):
 954     # The following forms are supported by LyX:
 955     # '\i \"{a}' (standard form, as written by LyX)
 956     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 957     # '\i \"{ }' (also accepted if the accented char is a space)
 958     # '\i \" a'  (also accepted)
 959     # '\i \"'    (also accepted)
 960     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 961     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 962     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 963     i = 0
 964     while 1:
 965         i = find_re(document.body, re_wholeinset, i)
 966         if i == -1:
 967             return
 968         match = re_wholeinset.match(document.body[i])
 969         prefix = match.group(1)
 970         contents = match.group(3).strip()
 971         match = re_contents.match(contents)
 972         if match:
 973             # Strip first char (always \)
 974             accent = match.group(1)[1:]
 975             accented_contents = match.group(2).strip()
 976             match = re_accentedcontents.match(accented_contents)
 977             accented_char = match.group(1)
 978             converted = _convert_accent(accent, accented_char)
 979             if converted == '':
 980                 # Normalize contents
 981                 contents = '%s{%s}' % (accent, accented_char),
 982             else:
 983                 document.body[i] = '%s%s' % (prefix, converted)
 984                 i += 1
 985                 continue
 986         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 987         document.body[i] = prefix
 988         document.body[i+1:i+1] = ['\\begin_inset ERT',
 989                                   'status collapsed',
 990                                   '',
 991                                   '\\begin_layout %s' % document.default_layout,
 992                                   '',
 993                                   '',
 994                                   '']
 995         i = convert_ertbackslash(document.body, i + 7,
 996                                  '\\%s' % contents,
 997                                  document.default_layout)
 998         document.body[i+1:i+1] = ['\\end_layout',
 999                                   '',
1000                                   '\\end_inset']
1001         i += 3
1002
1003
1004 def revert_accent(document):
1005     inverse_accent_map = {}
1006     for k in accent_map:
1007         inverse_accent_map[accent_map[k]] = k
1008     inverse_special_accent_map = {}
1009     for k in special_accent_map:
1010         inverse_special_accent_map[special_accent_map[k]] = k
1011     inverse_accented_map = {}
1012     for k in accented_map:
1013         inverse_accented_map[accented_map[k]] = k
1014
1015     # Since LyX may insert a line break within a word we must combine all
1016     # words before unicode normalization.
1017     # We do this only if the next line starts with an accent, otherwise we
1018     # would create things like '\begin_inset ERTstatus'.
1019     numberoflines = len(document.body)
1020     for i in range(numberoflines-1):
1021         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1022             continue
1023         if (document.body[i+1][0] in inverse_accent_map):
1024             # the last character of this line and the first of the next line
1025             # form probably a surrogate pair.
1026             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1027                 document.body[i] += document.body[i+1][0]
1028                 document.body[i+1] = document.body[i+1][1:]
1029
1030     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1031     # This is needed to catch all accented characters.
1032     for i in range(numberoflines):
1033         # Unfortunately we have a mixture of unicode strings and plain strings,
1034         # because we never use u'xxx' for string literals, but 'xxx'.
1035         # Therefore we may have to try two times to normalize the data.
1036         try:
1037             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1038         except TypeError:
1039             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1040
1041     # Replace accented characters with InsetLaTeXAccent
1042     # Do not convert characters that can be represented in the chosen
1043     # encoding.
1044     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1045     lang_re = re.compile(r"^\\lang\s(\S+)")
1046     i = 0
1047     while i < len(document.body):
1048
1049         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1050             # Track the encoding of the current line
1051             result = lang_re.match(document.body[i])
1052             if result:
1053                 language = result.group(1)
1054                 if language == "default":
1055                     encoding_stack[-1] = document.encoding
1056                 else:
1057                     from lyx2lyx_lang import lang
1058                     encoding_stack[-1] = lang[language][3]
1059                 continue
1060             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1061                 encoding_stack.append(encoding_stack[-1])
1062                 continue
1063             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1064                 del encoding_stack[-1]
1065                 continue
1066
1067         for j in range(len(document.body[i])):
1068             # dotless i and dotless j are both in special_accent_map and can
1069             # occur as an accented character, so we need to test that the
1070             # following character is no accent
1071             if (document.body[i][j] in inverse_special_accent_map and
1072                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1073                 accent = document.body[i][j]
1074                 try:
1075                     dummy = accent.encode(encoding_stack[-1])
1076                 except UnicodeEncodeError:
1077                     # Insert the rest of the line as new line
1078                     if j < len(document.body[i]) - 1:
1079                         document.body.insert(i+1, document.body[i][j+1:])
1080                     # Delete the accented character
1081                     if j > 0:
1082                         document.body[i] = document.body[i][:j-1]
1083                     else:
1084                         document.body[i] = u''
1085                     # Finally add the InsetLaTeXAccent
1086                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1087                     break
1088             elif j > 0 and document.body[i][j] in inverse_accent_map:
1089                 accented_char = document.body[i][j-1]
1090                 if accented_char == ' ':
1091                     # Conform to LyX output
1092                     accented_char = ''
1093                 elif accented_char in inverse_accented_map:
1094                     accented_char = inverse_accented_map[accented_char]
1095                 accent = document.body[i][j]
1096                 try:
1097                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1098                 except UnicodeEncodeError:
1099                     # Insert the rest of the line as new line
1100                     if j < len(document.body[i]) - 1:
1101                         document.body.insert(i+1, document.body[i][j+1:])
1102                     # Delete the accented characters
1103                     if j > 1:
1104                         document.body[i] = document.body[i][:j-2]
1105                     else:
1106                         document.body[i] = u''
1107                     # Finally add the InsetLaTeXAccent
1108                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1109                     break
1110         i = i + 1
1111
1112     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1113     for i in range(numberoflines):
1114         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1115
1116
1117 def normalize_font_whitespace_259(document):
1118     """ Before format 259 the font changes were ignored if a
1119     whitespace was the first or last character in the sequence, this function
1120     transfers the whitespace outside."""
1121
1122     char_properties = {"\\series": "default",
1123                        "\\emph": "default",
1124                        "\\color": "none",
1125                        "\\shape": "default",
1126                        "\\bar": "default",
1127                        "\\family": "default"}
1128     return normalize_font_whitespace(document, char_properties)
1129
1130 def normalize_font_whitespace_274(document):
1131     """ Before format 259 (sic) the font changes were ignored if a
1132     whitespace was the first or last character in the sequence. This was
1133     corrected for most font properties in format 259, but the language
1134     was forgotten then. This function applies the same conversion done
1135     there (namely, transfers the whitespace outside) for font language
1136     changes, as well."""
1137
1138     char_properties = {"\\lang": "default"}
1139     return normalize_font_whitespace(document, char_properties)
1140
1141 def get_paragraph_language(document, i):
1142     """ Return the language of the paragraph in which line i of the document
1143     body is. If the first thing in the paragraph is a \\lang command, that
1144     is the paragraph's langauge; otherwise, the paragraph's language is the
1145     document's language."""
1146
1147     lines = document.body
1148
1149     first_nonempty_line = \
1150         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1151
1152     words = lines[first_nonempty_line].split()
1153
1154     if len(words) > 1 and words[0] == "\\lang":
1155         return words[1]
1156     else:
1157         return document.language
1158
1159 def normalize_font_whitespace(document, char_properties):
1160     """ Before format 259 the font changes were ignored if a
1161     whitespace was the first or last character in the sequence, this function
1162     transfers the whitespace outside. Only a change in one of the properties
1163     in the provided     char_properties is handled by this function."""
1164
1165     if document.backend != "latex":
1166         return
1167
1168     lines = document.body
1169
1170     changes = {}
1171
1172     i = 0
1173     while i < len(lines):
1174         words = lines[i].split()
1175
1176         if len(words) > 0 and words[0] == "\\begin_layout":
1177             # a new paragraph resets all font changes
1178             changes.clear()
1179             # also reset the default language to be the paragraph's language
1180             if "\\lang" in char_properties.keys():
1181                 char_properties["\\lang"] = \
1182                     get_paragraph_language(document, i + 1)
1183
1184         elif len(words) > 1 and words[0] in char_properties.keys():
1185             # we have a font change
1186             if char_properties[words[0]] == words[1]:
1187                 # property gets reset
1188                 if words[0] in changes.keys():
1189                     del changes[words[0]]
1190                 defaultproperty = True
1191             else:
1192                 # property gets set
1193                 changes[words[0]] = words[1]
1194                 defaultproperty = False
1195
1196             # We need to explicitly reset all changed properties if we find
1197             # a space below, because LyX 1.4 would output the space after
1198             # closing the previous change and before starting the new one,
1199             # and closing a font change means to close all properties, not
1200             # just the changed one.
1201
1202             if lines[i-1] and lines[i-1][-1] == " ":
1203                 lines[i-1] = lines[i-1][:-1]
1204                 # a space before the font change
1205                 added_lines = [" "]
1206                 for k in changes.keys():
1207                     # exclude property k because that is already in lines[i]
1208                     if k != words[0]:
1209                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1210                 for k in changes.keys():
1211                     # exclude property k because that must be added below anyway
1212                     if k != words[0]:
1213                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1214                 if defaultproperty:
1215                     # Property is reset in lines[i], so add the new stuff afterwards
1216                     lines[i+1:i+1] = added_lines
1217                 else:
1218                     # Reset property for the space
1219                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1220                     lines[i:i] = added_lines
1221                 i = i + len(added_lines)
1222
1223             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1224                 # a space after the font change
1225                 if (lines[i+1] == " " and lines[i+2]):
1226                     next_words = lines[i+2].split()
1227                     if len(next_words) > 0 and next_words[0] == words[0]:
1228                         # a single blank with a property different from the
1229                         # previous and the next line must not be changed
1230                         i = i + 2
1231                         continue
1232                 lines[i+1] = lines[i+1][1:]
1233                 added_lines = [" "]
1234                 for k in changes.keys():
1235                     # exclude property k because that is already in lines[i]
1236                     if k != words[0]:
1237                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1238                 for k in changes.keys():
1239                     # exclude property k because that must be added below anyway
1240                     if k != words[0]:
1241                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1242                 # Reset property for the space
1243                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1244                 lines[i:i] = added_lines
1245                 i = i + len(added_lines)
1246
1247         i = i + 1
1248
1249
1250 def revert_utf8x(document):
1251     " Set utf8x encoding to utf8. "
1252     i = find_token(document.header, "\\inputencoding", 0)
1253     if i == -1:
1254         document.header.append("\\inputencoding auto")
1255     else:
1256         inputenc = get_value(document.header, "\\inputencoding", i)
1257         if inputenc == "utf8x":
1258             document.header[i] = "\\inputencoding utf8"
1259     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1260
1261
1262 def revert_utf8plain(document):
1263     " Set utf8plain encoding to utf8. "
1264     i = find_token(document.header, "\\inputencoding", 0)
1265     if i == -1:
1266         document.header.append("\\inputencoding auto")
1267     else:
1268         inputenc = get_value(document.header, "\\inputencoding", i)
1269         if inputenc == "utf8-plain":
1270             document.header[i] = "\\inputencoding utf8"
1271     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1272
1273
1274 def revert_beamer_alert(document):
1275     " Revert beamer's \\alert inset back to ERT. "
1276     i = 0
1277     while 1:
1278         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1279         if i == -1:
1280             return
1281         document.body[i] = "\\begin_inset ERT"
1282         i = i + 1
1283         while 1:
1284             if (document.body[i][:13] == "\\begin_layout"):
1285                 # Insert the \alert command
1286                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1287                 break
1288             i = i + 1
1289
1290         i = i + 1
1291
1292
1293 def revert_beamer_structure(document):
1294     " Revert beamer's \\structure inset back to ERT. "
1295     i = 0
1296     while 1:
1297         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1298         if i == -1:
1299             return
1300         document.body[i] = "\\begin_inset ERT"
1301         i = i + 1
1302         while 1:
1303             if (document.body[i][:13] == "\\begin_layout"):
1304                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1305                 break
1306             i = i + 1
1307
1308         i = i + 1
1309
1310
1311 def convert_changes(document):
1312     " Switch output_changes off if tracking_changes is off. "
1313     i = find_token(document.header, '\\tracking_changes', 0)
1314     if i == -1:
1315         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1316         return
1317     j = find_token(document.header, '\\output_changes', 0)
1318     if j == -1:
1319         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1320         return
1321     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1322     output_changes = get_value(document.header, "\\output_changes", j)
1323     if tracking_changes == "false" and output_changes == "true":
1324         document.header[j] = "\\output_changes false"
1325
1326
1327 def revert_ascii(document):
1328     " Set ascii encoding to auto. "
1329     i = find_token(document.header, "\\inputencoding", 0)
1330     if i == -1:
1331         document.header.append("\\inputencoding auto")
1332     else:
1333         inputenc = get_value(document.header, "\\inputencoding", i)
1334         if inputenc == "ascii":
1335             document.header[i] = "\\inputencoding auto"
1336     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1337
1338
1339 def normalize_language_name(document):
1340     lang = { "brazil": "brazilian",
1341              "portuges": "portuguese"}
1342
1343     if document.language in lang:
1344         document.language = lang[document.language]
1345         i = find_token(document.header, "\\language", 0)
1346         document.header[i] = "\\language %s" % document.language
1347
1348
1349 def revert_language_name(document):
1350     lang = { "brazilian": "brazil",
1351              "portuguese": "portuges"}
1352
1353     if document.language in lang:
1354         document.language = lang[document.language]
1355         i = find_token(document.header, "\\language", 0)
1356         document.header[i] = "\\language %s" % document.language
1357
1358 #
1359 #  \textclass cv -> \textclass simplecv
1360 def convert_cv_textclass(document):
1361     if document.textclass == "cv":
1362         document.textclass = "simplecv"
1363
1364
1365 def revert_cv_textclass(document):
1366     if document.textclass == "simplecv":
1367         document.textclass = "cv"
1368
1369
1370 #
1371 # add scaleBeforeRotation graphics param
1372 def convert_graphics_rotation(document):
1373     " add scaleBeforeRotation graphics parameter. "
1374     i = 0
1375     while 1:
1376         i = find_token(document.body, "\\begin_inset Graphics", i)
1377         if i == -1:
1378             return
1379         j = find_end_of_inset(document.body, i+1)
1380         if j == -1:
1381             # should not happen
1382             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1383         # Seach for rotateAngle and width or height or scale
1384         # If these params are not there, nothing needs to be done.
1385         k = find_token(document.body, "\trotateAngle", i + 1, j)
1386         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1387         if (k != -1 and l != -1):
1388             document.body.insert(j, 'scaleBeforeRotation')
1389         i = i + 1
1390
1391
1392 #
1393 # remove scaleBeforeRotation graphics param
1394 def revert_graphics_rotation(document):
1395     " remove scaleBeforeRotation graphics parameter. "
1396     i = 0
1397     while 1:
1398         i = find_token(document.body, "\\begin_inset Graphics", i)
1399         if i == -1:
1400             return
1401         j = find_end_of_inset(document.body, i + 1)
1402         if j == -1:
1403             # should not happen
1404             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1405         # If there's a scaleBeforeRotation param, just remove that
1406         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1407         if k != -1:
1408             del document.body[k]
1409         else:
1410             # if not, and if we have rotateAngle and width or height or scale,
1411             # we have to put the rotateAngle value to special
1412             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1413             special = get_value(document.body, 'special', i + 1, j)
1414             if rotateAngle != "":
1415                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1416                 if k == -1:
1417                     break
1418                 if special == "":
1419                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1420                 else:
1421                     l = find_token(document.body, "\tspecial", i + 1, j)
1422                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1423                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1424                 if k != -1:
1425                     del document.body[k]
1426         i = i + 1
1427
1428
1429
1430 def convert_tableborder(document):
1431     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1432     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1433     i = 0
1434     while i < len(document.body):
1435         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1436         k = document.body[i].find("|>{", 0, len(document.body[i]))
1437         # the two tokens have to be in one line
1438         if (h != -1 and k != -1):
1439             # delete the "|"
1440             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1441         i = i + 1
1442
1443
1444 def revert_tableborder(document):
1445     i = 0
1446     while i < len(document.body):
1447         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1448         k = document.body[i].find(">{", 0, len(document.body[i]))
1449         # the two tokens have to be in one line
1450         if (h != -1 and k != -1):
1451             # add the "|"
1452             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1453         i = i + 1
1454
1455
1456 def revert_armenian(document):
1457
1458     # set inputencoding from armscii8 to auto
1459     if document.inputencoding == "armscii8":
1460         i = find_token(document.header, "\\inputencoding", 0)
1461         if i != -1:
1462             document.header[i] = "\\inputencoding auto"
1463     # check if preamble exists, if not k is set to -1
1464     i = 0
1465     k = -1
1466     while i < len(document.preamble):
1467         if k == -1:
1468             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1469         if k == -1:
1470             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1471         i = i + 1
1472     # add the entry \usepackage{armtex} to the document preamble
1473     if document.language == "armenian":
1474         # set the armtex entry as the first preamble line
1475         if k != -1:
1476             document.preamble[0:0] = ["\\usepackage{armtex}"]
1477         # create the preamble when it doesn't exist
1478         else:
1479             document.preamble.append('\\usepackage{armtex}')
1480     # Set document language from armenian to english
1481     if document.language == "armenian":
1482         document.language = "english"
1483         i = find_token(document.header, "\\language", 0)
1484         if i != -1:
1485             document.header[i] = "\\language english"
1486
1487
1488 def revert_CJK(document):
1489     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1490     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1491                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1492     i = find_token(document.header, "\\inputencoding", 0)
1493     if i == -1:
1494         document.header.append("\\inputencoding auto")
1495     else:
1496         inputenc = get_value(document.header, "\\inputencoding", i)
1497         if inputenc in encodings:
1498             document.header[i] = "\\inputencoding default"
1499     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1500
1501     if document.language == "chinese-simplified" or \
1502        document.language == "chinese-traditional" or \
1503        document.language == "japanese" or document.language == "korean":
1504         document.language = "english"
1505         i = find_token(document.header, "\\language", 0)
1506         if i != -1:
1507             document.header[i] = "\\language english"
1508
1509
1510 def revert_preamble_listings_params(document):
1511     " Revert preamble option \listings_params "
1512     i = find_token(document.header, "\\listings_params", 0)
1513     if i != -1:
1514         document.preamble.append('\\usepackage{listings}')
1515         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1516         document.header.pop(i);
1517
1518
1519 def revert_listings_inset(document):
1520     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1521 FROM
1522
1523 \begin_inset
1524 lstparams "language=Delphi"
1525 inline true
1526 status open
1527
1528 \begin_layout Standard
1529 var i = 10;
1530 \end_layout
1531
1532 \end_inset
1533
1534 TO
1535
1536 \begin_inset ERT
1537 status open
1538 \begin_layout Standard
1539
1540
1541 \backslash
1542 lstinline[language=Delphi]{var i = 10;}
1543 \end_layout
1544
1545 \end_inset
1546
1547 There can be an caption inset in this inset
1548
1549 \begin_layout Standard
1550 \begin_inset Caption
1551
1552 \begin_layout Standard
1553 before label
1554 \begin_inset LatexCommand label
1555 name "lst:caption"
1556
1557 \end_inset
1558
1559 after label
1560 \end_layout
1561
1562 \end_inset
1563
1564
1565 \end_layout
1566
1567 '''
1568     i = 0
1569     while True:
1570         i = find_token(document.body, '\\begin_inset listings', i)
1571         if i == -1:
1572             break
1573         else:
1574             if not '\\usepackage{listings}' in document.preamble:
1575                 document.preamble.append('\\usepackage{listings}')
1576         j = find_end_of_inset(document.body, i + 1)
1577         if j == -1:
1578             # this should not happen
1579             break
1580         inline = 'false'
1581         params = ''
1582         status = 'open'
1583         # first three lines
1584         for line in range(i + 1, i + 4):
1585             if document.body[line].startswith('inline'):
1586                 inline = document.body[line].split()[1]
1587             if document.body[line].startswith('lstparams'):
1588                 params = document.body[line].split()[1].strip('"')
1589             if document.body[line].startswith('status'):
1590                 status = document.body[line].split()[1].strip()
1591                 k = line + 1
1592         # caption?
1593         caption = ''
1594         label = ''
1595         cap = find_token(document.body, '\\begin_inset Caption', i)
1596         if cap != -1:
1597             cap_end = find_end_of_inset(document.body, cap + 1)
1598             if cap_end == -1:
1599                 # this should not happen
1600                 break
1601             # label?
1602             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1603             if lbl != -1:
1604                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1605                 if lbl_end == -1:
1606                     # this should not happen
1607                     break
1608             else:
1609                 lbl = cap_end
1610                 lbl_end = cap_end
1611             for line in document.body[lbl : lbl_end + 1]:
1612                 if line.startswith('name '):
1613                     label = line.split()[1].strip('"')
1614                     break
1615             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1616                 if not line.startswith('\\'):
1617                     caption += line.strip()
1618             k = cap_end + 1
1619         inlinecode = ''
1620         # looking for the oneline code for lstinline
1621         inlinecode = document.body[find_end_of_layout(document.body,
1622             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1623         if len(caption) > 0:
1624             if len(params) == 0:
1625                 params = 'caption={%s}' % caption
1626             else:
1627                 params += ',caption={%s}' % caption
1628         if len(label) > 0:
1629             if len(params) == 0:
1630                 params = 'label={%s}' % label
1631             else:
1632                 params += ',label={%s}' % label
1633         if len(params) > 0:
1634             params = '[%s]' % params
1635             params = params.replace('\\', '\\backslash\n')
1636         if inline == 'true':
1637             document.body[i:(j+1)] = [r'\begin_inset ERT',
1638                                       'status %s' % status,
1639                                       r'\begin_layout %s' % document.default_layout,
1640                                       '',
1641                                       '',
1642                                       r'\backslash',
1643                                       'lstinline%s{%s}' % (params, inlinecode),
1644                                       r'\end_layout',
1645                                       '',
1646                                       r'\end_inset']
1647         else:
1648             document.body[i: j+1] =  [r'\begin_inset ERT',
1649                                       'status %s' % status,
1650                                       '',
1651                                       r'\begin_layout %s' % document.default_layout,
1652                                       '',
1653                                       '',
1654                                       r'\backslash',
1655                                       r'begin{lstlisting}%s' % params,
1656                                       r'\end_layout'
1657                                     ] + document.body[k : j - 1] + \
1658                                      ['',
1659                                       r'\begin_layout %s' % document.default_layout,
1660                                       '',
1661                                       r'\backslash',
1662                                       'end{lstlisting}',
1663                                       r'\end_layout',
1664                                       '',
1665                                       r'\end_inset']
1666
1667
1668 def revert_include_listings(document):
1669     r''' Revert lstinputlisting Include option , translate
1670 \begin_inset Include \lstinputlisting{file}[opt]
1671 preview false
1672
1673 \end_inset
1674
1675 TO
1676
1677 \begin_inset ERT
1678 status open
1679
1680 \begin_layout Standard
1681
1682
1683 \backslash
1684 lstinputlisting{file}[opt]
1685 \end_layout
1686
1687 \end_inset
1688     '''
1689
1690     i = 0
1691     while True:
1692         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1693         if i == -1:
1694             break
1695         else:
1696             if not '\\usepackage{listings}' in document.preamble:
1697                 document.preamble.append('\\usepackage{listings}')
1698         j = find_end_of_inset(document.body, i + 1)
1699         if j == -1:
1700             # this should not happen
1701             break
1702         # find command line lstinputlisting{file}[options]
1703         cmd, file, option = '', '', ''
1704         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1705             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1706         option = option.replace('\\', '\\backslash\n')
1707         document.body[i : j + 1] = [r'\begin_inset ERT',
1708                                     'status open',
1709                                     '',
1710                                     r'\begin_layout %s' % document.default_layout,
1711                                     '',
1712                                     '',
1713                                     r'\backslash',
1714                                     '%s%s{%s}' % (cmd, option, file),
1715                                     r'\end_layout',
1716                                     '',
1717                                     r'\end_inset']
1718
1719
1720 def revert_ext_font_sizes(document):
1721     if document.backend != "latex": return
1722     if not document.textclass.startswith("ext"): return
1723
1724     fontsize = get_value(document.header, '\\paperfontsize', 0)
1725     if fontsize not in ('10', '11', '12'): return
1726     fontsize += 'pt'
1727
1728     i = find_token(document.header, '\\paperfontsize', 0)
1729     document.header[i] = '\\paperfontsize default'
1730
1731     i = find_token(document.header, '\\options', 0)
1732     if i == -1:
1733         i = find_token(document.header, '\\textclass', 0) + 1
1734         document.header[i:i] = ['\\options %s' % fontsize]
1735     else:
1736         document.header[i] += ',%s' % fontsize
1737
1738
1739 def convert_ext_font_sizes(document):
1740     if document.backend != "latex": return
1741     if not document.textclass.startswith("ext"): return
1742
1743     fontsize = get_value(document.header, '\\paperfontsize', 0)
1744     if fontsize != 'default': return
1745
1746     i = find_token(document.header, '\\options', 0)
1747     if i == -1: return
1748
1749     options = get_value(document.header, '\\options', i)
1750
1751     fontsizes = '10pt', '11pt', '12pt'
1752     for fs in fontsizes:
1753         if options.find(fs) != -1:
1754             break
1755     else: # this else will only be attained if the for cycle had no match
1756         return
1757
1758     options = options.split(',')
1759     for j, opt in enumerate(options):
1760         if opt in fontsizes:
1761             fontsize = opt[:-2]
1762             del options[j]
1763             break
1764     else:
1765         return
1766
1767     k = find_token(document.header, '\\paperfontsize', 0)
1768     document.header[k] = '\\paperfontsize %s' % fontsize
1769
1770     if options:
1771         document.header[i] = '\\options %s' % ','.join(options)
1772     else:
1773         del document.header[i]
1774
1775
1776 def revert_separator_layout(document):
1777     r'''Revert --Separator-- to a lyx note
1778 From
1779
1780 \begin_layout --Separator--
1781 something
1782 \end_layout
1783
1784 to
1785
1786 \begin_layout Standard
1787 \begin_inset Note Note
1788 status open
1789
1790 \begin_layout Standard
1791 Separate Evironment
1792 \end_layout
1793
1794 \end_inset
1795 something
1796
1797 \end_layout
1798
1799     '''
1800
1801     i = 0
1802     while True:
1803         i = find_token(document.body, r'\begin_layout --Separator--', i)
1804         if i == -1:
1805             break
1806         j = find_end_of_layout(document.body, i + 1)
1807         if j == -1:
1808             # this should not happen
1809             break
1810         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1811                                     r'\begin_inset Note Note',
1812                                     'status open',
1813                                     '',
1814                                     r'\begin_layout %s' % document.default_layout,
1815                                     'Separate Environment',
1816                                     r'\end_layout',
1817                                     '',
1818                                     r'\end_inset'] + \
1819                                     document.body[ i + 1 : j] + \
1820                                     ['',
1821                                     r'\end_layout'
1822                                     ]
1823
1824
1825 def convert_arabic (document):
1826     if document.language == "arabic":
1827         document.language = "arabic_arabtex"
1828         i = find_token(document.header, "\\language", 0)
1829         if i != -1:
1830             document.header[i] = "\\language arabic_arabtex"
1831     i = 0
1832     while i < len(document.body):
1833         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1834         if (h != -1):
1835             # change the language name
1836             document.body[i] = '\lang arabic_arabtex'
1837         i = i + 1
1838
1839
1840 def revert_arabic (document):
1841     if document.language == "arabic_arabtex":
1842         document.language = "arabic"
1843         i = find_token(document.header, "\\language", 0)
1844         if i != -1:
1845             document.header[i] = "\\language arabic"
1846     i = 0
1847     while i < len(document.body):
1848         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1849         if (h != -1):
1850             # change the language name
1851             document.body[i] = '\lang arabic'
1852         i = i + 1
1853
1854
1855 def read_unicodesymbols():
1856     " Read the unicodesymbols list of unicode characters and corresponding commands."
1857     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
1858     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
1859     spec_chars = {}
1860     for line in fp.readlines():
1861         if line[0] != '#':
1862             line=line.replace(' "',' ') # remove all quotation marks with spaces before
1863             line=line.replace('" ',' ') # remove all quotation marks with spaces after
1864             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
1865             try:
1866                 # flag1 and flag2 are preamble and other flags
1867                 [ucs4,command,flag1,flag2] =line.split(None,3)
1868                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1869             except:
1870                 pass
1871     fp.close()
1872
1873     return spec_chars
1874
1875
1876 def revert_unicode(document):
1877     '''Transform unicode characters that can not be written using the
1878 document encoding to commands according to the unicodesymbols
1879 file. Characters that can not be replaced by commands are replaced by
1880 an replacement string.  Flags other than 'combined' are currently not
1881 implemented.'''
1882
1883     replacement_character = '???'
1884     spec_chars = read_unicodesymbols()
1885
1886     # Define strings to start and end ERT and math insets
1887     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash' % document.default_layout
1888     ert_outro='\n\\end_layout\n\n\\end_inset\n'
1889     math_intro='\n\\begin_inset Formula $'
1890     math_outro='$\n\\end_inset'
1891     # Find unicode characters and replace them
1892     in_ert = False # flag set to 1 if in ERT inset
1893     in_math = False # flag set to 1 if in math inset
1894     insets = [] # list of active insets
1895
1896     # Go through the file to capture all combining characters
1897     last_char = '' # to store the previous character
1898
1899     i = 0
1900     while i < len(document.body):
1901         line = document.body[i]
1902         # Check for insets
1903         if line.find('\\begin_inset') > -1:
1904             # check which inset to start
1905             if line.find('\\begin_inset ERT') > -1:
1906                 in_ert = True
1907                 insets.append('ert')
1908             elif line.find('\\begin_inset Formula') > -1:
1909                 in_math = True
1910                 insets.append('math')
1911             else:
1912                 insets.append('other')
1913         if line.find('\\end_inset') > -1:
1914             # check which inset to end
1915             try:
1916                 cur_inset = insets.pop()
1917                 if cur_inset == 'ert':
1918                     in_ert = False
1919                 elif cur_inset == 'math':
1920                     in_math = False
1921                 else:
1922                     pass # end of other inset
1923             except:
1924                 pass # inset list was empty (for some reason)
1925
1926         # Try to write the line
1927         try:
1928             # If all goes well the line is written here
1929             dummy = line.encode(document.encoding)
1930             last_char = line[-1]
1931             i += 1
1932         except:
1933             # Error, some character(s) in the line need to be replaced
1934             mod_line = u''
1935             for character in line:
1936                 try:
1937                     # Try to write the character
1938                     dummy = character.encode(document.encoding)
1939                     mod_line += character
1940                     last_char = character
1941                 except:
1942                     # Try to replace with ERT/math inset
1943                     if spec_chars.has_key(character):
1944                         command = spec_chars[character][0] # the command to replace unicode
1945                         flag1 = spec_chars[character][1]
1946                         flag2 = spec_chars[character][2]
1947                         if flag1.find('combining') > -1 or flag2.find('combining') > -1:
1948                             # We have a character that should be combined with the previous
1949                             command += '{' + last_char + '}'
1950                             # Remove the last character. Ignore if it is whitespace
1951                             if len(last_char.rstrip()):
1952                                 # last_char was found and is not whitespace
1953                                 if mod_line:
1954                                     mod_line = mod_line[:-1]
1955                                 else: # last_char belongs to the last line
1956                                     document.body[i-1] = document.body[i-1][:-1]
1957                             else:
1958                                 # The last character was replaced by a command. For now it is
1959                                 # ignored. This could be handled better.
1960                                 pass
1961                         if command[0:2] == '\\\\':
1962                             if command[2:12]=='ensuremath':
1963                                 if in_ert:
1964                                     # math in ERT
1965                                     command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash')
1966                                     command = command.replace('}', '$\n')
1967                                 elif not in_math:
1968                                     # add a math inset with the replacement character
1969                                     command = command.replace('\\\\ensuremath{\\', math_intro)
1970                                     command = command.replace('}', math_outro)
1971                                 else:
1972                                     # we are already in a math inset
1973                                     command = command.replace('\\\\ensuremath{\\', '')
1974                                     command = command.replace('}', '')
1975                             else:
1976                                 if in_math:
1977                                     # avoid putting an ERT in a math; instead put command as text
1978                                     command = command.replace('\\\\', '\mathrm{')
1979                                     command = command + '}'
1980                                 elif not in_ert:
1981                                     # add an ERT inset with the replacement character
1982                                     command = command.replace('\\\\', ert_intro)
1983                                     command = command + ert_outro
1984                                 else:
1985                                     command = command.replace('\\\\', '\n\\backslash')
1986                             last_char = '' # indicate that the character should not be removed
1987                         mod_line += command
1988                     else:
1989                         # Replace with replacement string
1990                         mod_line += replacement_character
1991             document.body[i:i+1] = mod_line.split('\n')
1992             i += len(mod_line.split('\n'))
1993
1994
1995 ##
1996 # Conversion hub
1997 #
1998
1999 supported_versions = ["1.5.0","1.5"]
2000 convert = [[246, []],
2001            [247, [convert_font_settings]],
2002            [248, []],
2003            [249, [convert_utf8]],
2004            [250, []],
2005            [251, []],
2006            [252, [convert_commandparams, convert_bibitem]],
2007            [253, []],
2008            [254, [convert_esint]],
2009            [255, []],
2010            [256, []],
2011            [257, [convert_caption]],
2012            [258, [convert_lyxline]],
2013            [259, [convert_accent, normalize_font_whitespace_259]],
2014            [260, []],
2015            [261, [convert_changes]],
2016            [262, []],
2017            [263, [normalize_language_name]],
2018            [264, [convert_cv_textclass]],
2019            [265, [convert_tableborder]],
2020            [266, []],
2021            [267, []],
2022            [268, []],
2023            [269, []],
2024            [270, []],
2025            [271, [convert_ext_font_sizes]],
2026            [272, []],
2027            [273, []],
2028            [274, [normalize_font_whitespace_274]],
2029            [275, [convert_graphics_rotation]],
2030            [276, [convert_arabic]]
2031           ]
2032
2033 revert =  [
2034            [275, [revert_arabic]],
2035            [274, [revert_graphics_rotation]],
2036            [273, []],
2037            [272, [revert_separator_layout]],
2038            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2039            [270, [revert_ext_font_sizes]],
2040            [269, [revert_beamer_alert, revert_beamer_structure]],
2041            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2042            [267, [revert_CJK]],
2043            [266, [revert_utf8plain]],
2044            [265, [revert_armenian]],
2045            [264, [revert_tableborder]],
2046            [263, [revert_cv_textclass]],
2047            [262, [revert_language_name]],
2048            [261, [revert_ascii]],
2049            [260, []],
2050            [259, [revert_utf8x]],
2051            [258, []],
2052            [257, []],
2053            [256, [revert_caption]],
2054            [255, [revert_encodings]],
2055            [254, [revert_clearpage, revert_cleardoublepage]],
2056            [253, [revert_esint]],
2057            [252, [revert_nomenclature, revert_printnomenclature]],
2058            [251, [revert_commandparams]],
2059            [250, [revert_cs_label]],
2060            [249, []],
2061            [248, [revert_accent, revert_utf8, revert_unicode]],
2062            [247, [revert_booktabs]],
2063            [246, [revert_font_settings]],
2064            [245, [revert_framed]]]
2065
2066
2067 if __name__ == "__main__":
2068     pass
2069
2070