lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     if document.cjk_encoding != '':
 250         return
 251     encoding_stack = [document.encoding]
 252     lang_re = re.compile(r"^\\lang\s(\S+)")
 253     if document.inputencoding == "auto" or document.inputencoding == "default":
 254         for i in range(len(document.body)):
 255             result = lang_re.match(document.body[i])
 256             if result:
 257                 language = result.group(1)
 258                 if language == "default":
 259                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 260                     encoding_stack[-1] = document.encoding
 261                 else:
 262                     from lyx2lyx_lang import lang
 263                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 264                     encoding_stack[-1] = lang[language][3]
 265             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 266                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 267                 encoding_stack.append(encoding_stack[-1])
 268             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 269                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(encoding_stack) == 1:
 271                     # Don't remove the document encoding from the stack
 272                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 273                 else:
 274                     del encoding_stack[-1]
 275             if encoding_stack[-1] != document.encoding:
 276                 if forward:
 277                     # This line has been incorrectly interpreted as if it was
 278                     # encoded in 'encoding'.
 279                     # Convert back to the 8bit string that was in the file.
 280                     orig = document.body[i].encode(document.encoding)
 281                     # Convert the 8bit string that was in the file to unicode
 282                     # with the correct encoding.
 283                     document.body[i] = orig.decode(encoding_stack[-1])
 284                 else:
 285                     # Convert unicode to the 8bit string that will be written
 286                     # to the file with the correct encoding.
 287                     orig = document.body[i].encode(encoding_stack[-1])
 288                     # Convert the 8bit string that will be written to the
 289                     # file to fake unicode with the encoding that will later
 290                     # be used when writing to the file.
 291                     document.body[i] = orig.decode(document.encoding)
 292
 293
 294 def convert_utf8(document):
 295     " Set document encoding to UTF-8. "
 296     convert_multiencoding(document, True)
 297     document.encoding = "utf8"
 298
 299
 300 def revert_utf8(document):
 301     " Set document encoding to the value corresponding to inputencoding. "
 302     i = find_token(document.header, "\\inputencoding", 0)
 303     if i == -1:
 304         document.header.append("\\inputencoding auto")
 305     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 306         document.header[i] = "\\inputencoding auto"
 307     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 308     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 309     convert_multiencoding(document, False)
 310
 311
 312 def revert_cs_label(document):
 313     " Remove status flag of charstyle label. "
 314     i = 0
 315     while 1:
 316         i = find_token(document.body, "\\begin_inset CharStyle", i)
 317         if i == -1:
 318             return
 319         # Seach for a line starting 'show_label'
 320         # If it is not there, break with a warning message
 321         i = i + 1
 322         while 1:
 323             if (document.body[i][:10] == "show_label"):
 324                 del document.body[i]
 325                 break
 326             elif (document.body[i][:13] == "\\begin_layout"):
 327                 document.warning("Malformed LyX document: Missing 'show_label'.")
 328                 break
 329             i = i + 1
 330
 331         i = i + 1
 332
 333
 334 def convert_bibitem(document):
 335     """ Convert
 336 \bibitem [option]{argument}
 337
 338 to
 339
 340 \begin_inset LatexCommand bibitem
 341 label "option"
 342 key "argument"
 343
 344 \end_inset
 345
 346 This must be called after convert_commandparams.
 347 """
 348     i = 0
 349     while 1:
 350         i = find_token(document.body, "\\bibitem", i)
 351         if i == -1:
 352             break
 353         j = document.body[i].find('[') + 1
 354         k = document.body[i].rfind(']')
 355         if j == 0: # No optional argument found
 356             option = None
 357         else:
 358             option = document.body[i][j:k]
 359         j = document.body[i].rfind('{') + 1
 360         k = document.body[i].rfind('}')
 361         argument = document.body[i][j:k]
 362         lines = ['\\begin_inset LatexCommand bibitem']
 363         if option != None:
 364             lines.append('label "%s"' % option.replace('"', '\\"'))
 365         lines.append('key "%s"' % argument.replace('"', '\\"'))
 366         lines.append('')
 367         lines.append('\\end_inset')
 368         document.body[i:i+1] = lines
 369         i = i + 1
 370
 371
 372 commandparams_info = {
 373     # command : [option1, option2, argument]
 374     "bibitem" : ["label", "", "key"],
 375     "bibtex" : ["options", "btprint", "bibfiles"],
 376     "cite"        : ["after", "before", "key"],
 377     "citet"       : ["after", "before", "key"],
 378     "citep"       : ["after", "before", "key"],
 379     "citealt"     : ["after", "before", "key"],
 380     "citealp"     : ["after", "before", "key"],
 381     "citeauthor"  : ["after", "before", "key"],
 382     "citeyear"    : ["after", "before", "key"],
 383     "citeyearpar" : ["after", "before", "key"],
 384     "citet*"      : ["after", "before", "key"],
 385     "citep*"      : ["after", "before", "key"],
 386     "citealt*"    : ["after", "before", "key"],
 387     "citealp*"    : ["after", "before", "key"],
 388     "citeauthor*" : ["after", "before", "key"],
 389     "Citet"       : ["after", "before", "key"],
 390     "Citep"       : ["after", "before", "key"],
 391     "Citealt"     : ["after", "before", "key"],
 392     "Citealp"     : ["after", "before", "key"],
 393     "Citeauthor"  : ["after", "before", "key"],
 394     "Citet*"      : ["after", "before", "key"],
 395     "Citep*"      : ["after", "before", "key"],
 396     "Citealt*"    : ["after", "before", "key"],
 397     "Citealp*"    : ["after", "before", "key"],
 398     "Citeauthor*" : ["after", "before", "key"],
 399     "citefield"   : ["after", "before", "key"],
 400     "citetitle"   : ["after", "before", "key"],
 401     "cite*"       : ["after", "before", "key"],
 402     "hfill" : ["", "", ""],
 403     "index"      : ["", "", "name"],
 404     "printindex" : ["", "", "name"],
 405     "label" : ["", "", "name"],
 406     "eqref"     : ["name", "", "reference"],
 407     "pageref"   : ["name", "", "reference"],
 408     "prettyref" : ["name", "", "reference"],
 409     "ref"       : ["name", "", "reference"],
 410     "vpageref"  : ["name", "", "reference"],
 411     "vref"      : ["name", "", "reference"],
 412     "tableofcontents" : ["", "", "type"],
 413     "htmlurl" : ["name", "", "target"],
 414     "url"     : ["name", "", "target"]}
 415
 416
 417 def convert_commandparams(document):
 418     """ Convert
 419
 420  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 421  \end_inset
 422
 423  to
 424
 425  \begin_inset LatexCommand cmdname
 426  name1 "opt1"
 427  name2 "opt2"
 428  name3 "arg"
 429  \end_inset
 430
 431  name1, name2 and name3 can be different for each command.
 432 """
 433     # \begin_inset LatexCommand bibitem was not the official version (see
 434     # convert_bibitem()), but could be read in, so we convert it here, too.
 435
 436     i = 0
 437     while 1:
 438         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 439         if i == -1:
 440             break
 441         command = document.body[i][26:].strip()
 442         if command == "":
 443             document.warning("Malformed LyX document: Missing LatexCommand name.")
 444             i = i + 1
 445             continue
 446
 447         # The following parser is taken from the original InsetCommandParams::scanCommand
 448         name = ""
 449         option1 = ""
 450         option2 = ""
 451         argument = ""
 452         state = "WS"
 453         # Used to handle things like \command[foo[bar]]{foo{bar}}
 454         nestdepth = 0
 455         b = 0
 456         for c in command:
 457             if ((state == "CMDNAME" and c == ' ') or
 458                 (state == "CMDNAME" and c == '[') or
 459                 (state == "CMDNAME" and c == '{')):
 460                 state = "WS"
 461             if ((state == "OPTION" and c == ']') or
 462                 (state == "SECOPTION" and c == ']') or
 463                 (state == "CONTENT" and c == '}')):
 464                 if nestdepth == 0:
 465                     state = "WS"
 466                 else:
 467                     nestdepth = nestdepth - 1
 468             if ((state == "OPTION" and c == '[') or
 469                 (state == "SECOPTION" and c == '[') or
 470                 (state == "CONTENT" and c == '{')):
 471                 nestdepth = nestdepth + 1
 472             if state == "CMDNAME":
 473                     name += c
 474             elif state == "OPTION":
 475                     option1 += c
 476             elif state == "SECOPTION":
 477                     option2 += c
 478             elif state == "CONTENT":
 479                     argument += c
 480             elif state == "WS":
 481                 if c == '\\':
 482                     state = "CMDNAME"
 483                 elif c == '[' and b != ']':
 484                     state = "OPTION"
 485                     nestdepth = 0 # Just to be sure
 486                 elif c == '[' and b == ']':
 487                     state = "SECOPTION"
 488                     nestdepth = 0 # Just to be sure
 489                 elif c == '{':
 490                     state = "CONTENT"
 491                     nestdepth = 0 # Just to be sure
 492             b = c
 493
 494         # Now we have parsed the command, output the parameters
 495         lines = ["\\begin_inset LatexCommand %s" % name]
 496         if option1 != "":
 497             if commandparams_info[name][0] == "":
 498                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 499             else:
 500                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 501         if option2 != "":
 502             if commandparams_info[name][1] == "":
 503                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 504             else:
 505                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 506         if argument != "":
 507             if commandparams_info[name][2] == "":
 508                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 509             else:
 510                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 511         document.body[i:i+1] = lines
 512         i = i + 1
 513
 514
 515 def revert_commandparams(document):
 516     regex = re.compile(r'(\S+)\s+(.+)')
 517     i = 0
 518     while 1:
 519         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 520         if i == -1:
 521             break
 522         name = document.body[i].split()[2]
 523         j = find_end_of_inset(document.body, i + 1)
 524         preview_line = ""
 525         option1 = ""
 526         option2 = ""
 527         argument = ""
 528         for k in range(i + 1, j):
 529             match = re.match(regex, document.body[k])
 530             if match:
 531                 pname = match.group(1)
 532                 pvalue = match.group(2)
 533                 if pname == "preview":
 534                     preview_line = document.body[k]
 535                 elif (commandparams_info[name][0] != "" and
 536                       pname == commandparams_info[name][0]):
 537                     option1 = pvalue.strip('"').replace('\\"', '"')
 538                 elif (commandparams_info[name][1] != "" and
 539                       pname == commandparams_info[name][1]):
 540                     option2 = pvalue.strip('"').replace('\\"', '"')
 541                 elif (commandparams_info[name][2] != "" and
 542                       pname == commandparams_info[name][2]):
 543                     argument = pvalue.strip('"').replace('\\"', '"')
 544             elif document.body[k].strip() != "":
 545                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 546         if name == "bibitem":
 547             if option1 == "":
 548                 lines = ["\\bibitem {%s}" % argument]
 549             else:
 550                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 551         else:
 552             if option1 == "":
 553                 if option2 == "":
 554                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 555                 else:
 556                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 557             else:
 558                 if option2 == "":
 559                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 560                 else:
 561                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 562         if name != "bibitem":
 563             if preview_line != "":
 564                 lines.append(preview_line)
 565             lines.append('')
 566             lines.append('\\end_inset')
 567         document.body[i:j+1] = lines
 568         i = j + 1
 569
 570
 571 def revert_nomenclature(document):
 572     " Convert nomenclature entry to ERT. "
 573     regex = re.compile(r'(\S+)\s+(.+)')
 574     i = 0
 575     use_nomencl = 0
 576     while 1:
 577         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 578         if i == -1:
 579             break
 580         use_nomencl = 1
 581         j = find_end_of_inset(document.body, i + 1)
 582         preview_line = ""
 583         symbol = ""
 584         description = ""
 585         prefix = ""
 586         for k in range(i + 1, j):
 587             match = re.match(regex, document.body[k])
 588             if match:
 589                 name = match.group(1)
 590                 value = match.group(2)
 591                 if name == "preview":
 592                     preview_line = document.body[k]
 593                 elif name == "symbol":
 594                     symbol = value.strip('"').replace('\\"', '"')
 595                 elif name == "description":
 596                     description = value.strip('"').replace('\\"', '"')
 597                 elif name == "prefix":
 598                     prefix = value.strip('"').replace('\\"', '"')
 599             elif document.body[k].strip() != "":
 600                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 601         if prefix == "":
 602             command = 'nomenclature{%s}{%s}' % (symbol, description)
 603         else:
 604             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 605         document.body[i:j+1] = ['\\begin_inset ERT',
 606                                 'status collapsed',
 607                                 '',
 608                                 '\\begin_layout %s' % document.default_layout,
 609                                 '',
 610                                 '',
 611                                 '\\backslash',
 612                                 command,
 613                                 '\\end_layout',
 614                                 '',
 615                                 '\\end_inset']
 616         i = i + 11
 617     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 618         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 619         document.preamble.append('\\makenomenclature')
 620
 621
 622 def revert_printnomenclature(document):
 623     " Convert printnomenclature to ERT. "
 624     regex = re.compile(r'(\S+)\s+(.+)')
 625     i = 0
 626     use_nomencl = 0
 627     while 1:
 628         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 629         if i == -1:
 630             break
 631         use_nomencl = 1
 632         j = find_end_of_inset(document.body, i + 1)
 633         preview_line = ""
 634         labelwidth = ""
 635         for k in range(i + 1, j):
 636             match = re.match(regex, document.body[k])
 637             if match:
 638                 name = match.group(1)
 639                 value = match.group(2)
 640                 if name == "preview":
 641                     preview_line = document.body[k]
 642                 elif name == "labelwidth":
 643                     labelwidth = value.strip('"').replace('\\"', '"')
 644             elif document.body[k].strip() != "":
 645                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 646         if labelwidth == "":
 647             command = 'nomenclature{}'
 648         else:
 649             command = 'nomenclature[%s]' % labelwidth
 650         document.body[i:j+1] = ['\\begin_inset ERT',
 651                                 'status collapsed',
 652                                 '',
 653                                 '\\begin_layout %s' % document.default_layout,
 654                                 '',
 655                                 '',
 656                                 '\\backslash',
 657                                 command,
 658                                 '\\end_layout',
 659                                 '',
 660                                 '\\end_inset']
 661         i = i + 11
 662     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 663         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 664         document.preamble.append('\\makenomenclature')
 665
 666
 667 def convert_esint(document):
 668     " Add \\use_esint setting to header. "
 669     i = find_token(document.header, "\\cite_engine", 0)
 670     if i == -1:
 671         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 672         return
 673     # 0 is off, 1 is auto, 2 is on.
 674     document.header.insert(i, '\\use_esint 0')
 675
 676
 677 def revert_esint(document):
 678     " Remove \\use_esint setting from header. "
 679     i = find_token(document.header, "\\use_esint", 0)
 680     if i == -1:
 681         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 682         return
 683     use_esint = document.header[i].split()[1]
 684     del document.header[i]
 685     # 0 is off, 1 is auto, 2 is on.
 686     if (use_esint == 2):
 687         document.preamble.append('\\usepackage{esint}')
 688
 689
 690 def revert_clearpage(document):
 691     " clearpage -> ERT "
 692     i = 0
 693     while 1:
 694         i = find_token(document.body, "\\clearpage", i)
 695         if i == -1:
 696             break
 697         document.body[i:i+1] =  ['\\begin_inset ERT',
 698                                 'status collapsed',
 699                                 '',
 700                                 '\\begin_layout %s' % document.default_layout,
 701                                 '',
 702                                 '',
 703                                 '\\backslash',
 704                                 'clearpage',
 705                                 '\\end_layout',
 706                                 '',
 707                                 '\\end_inset']
 708     i = i + 1
 709
 710
 711 def revert_cleardoublepage(document):
 712     " cleardoublepage -> ERT "
 713     i = 0
 714     while 1:
 715         i = find_token(document.body, "\\cleardoublepage", i)
 716         if i == -1:
 717             break
 718         document.body[i:i+1] =  ['\\begin_inset ERT',
 719                                 'status collapsed',
 720                                 '',
 721                                 '\\begin_layout %s' % document.default_layout,
 722                                 '',
 723                                 '',
 724                                 '\\backslash',
 725                                 'cleardoublepage',
 726                                 '\\end_layout',
 727                                 '',
 728                                 '\\end_inset']
 729     i = i + 1
 730
 731
 732 def convert_lyxline(document):
 733     " remove fontsize commands for \lyxline "
 734     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 735     # to change the line thickness. The new definiton does this so that imported
 736     # \lyxlines would have a different line thickness. The eventual fontsize command
 737     # before \lyxline is therefore removed to get the same output.
 738     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 739                  "large", "Large", "LARGE", "huge", "Huge"]
 740     for n in range(0, len(fontsizes)):
 741         i = 0
 742         k = 0
 743         while i < len(document.body):
 744             i = find_token(document.body, "\\size " + fontsizes[n], i)
 745             k = find_token(document.body, "\\lyxline", i)
 746             # the corresponding fontsize command is always 2 lines before the \lyxline
 747             if (i != -1 and k == i+2):
 748                 document.body[i:i+1] = []
 749             else:
 750                 break
 751         i = i + 1
 752
 753
 754 def revert_encodings(document):
 755     " Set new encodings to auto. "
 756     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 757                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 758                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 759     i = find_token(document.header, "\\inputencoding", 0)
 760     if i == -1:
 761         document.header.append("\\inputencoding auto")
 762     else:
 763         inputenc = get_value(document.header, "\\inputencoding", i)
 764         if inputenc in encodings:
 765             document.header[i] = "\\inputencoding auto"
 766     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 767
 768
 769 def convert_caption(document):
 770     " Convert caption layouts to caption insets. "
 771     i = 0
 772     while 1:
 773         i = find_token(document.body, "\\begin_layout Caption", i)
 774         if i == -1:
 775             return
 776         j = find_end_of_layout(document.body, i)
 777         if j == -1:
 778             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 779             return
 780
 781         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 782         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 783                             "\\begin_inset Caption", "",
 784                             "\\begin_layout %s" % document.default_layout]
 785         i = i + 1
 786
 787
 788 def revert_caption(document):
 789     " Convert caption insets to caption layouts. "
 790     " This assumes that the text class has a caption style. "
 791     i = 0
 792     while 1:
 793         i = find_token(document.body, "\\begin_inset Caption", i)
 794         if i == -1:
 795             return
 796
 797         # We either need to delete the previous \begin_layout line, or we
 798         # need to end the previous layout if this inset is not in the first
 799         # position of the paragraph.
 800         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 801         if layout_before == -1:
 802             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 803             return
 804         layout_line = document.body[layout_before]
 805         del_layout_before = True
 806         l = layout_before + 1
 807         while l < i:
 808             if document.body[l] != "":
 809                 del_layout_before = False
 810                 break
 811             l = l + 1
 812         if del_layout_before:
 813             del document.body[layout_before:i]
 814             i = layout_before
 815         else:
 816             document.body[i:i] = ["\\end_layout", ""]
 817             i = i + 2
 818
 819         # Find start of layout in the inset and end of inset
 820         j = find_token(document.body, "\\begin_layout", i)
 821         if j == -1:
 822             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 823             return
 824         k = find_end_of_inset(document.body, i)
 825         if k == -1:
 826             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 827             return
 828
 829         # We either need to delete the following \end_layout line, or we need
 830         # to restart the old layout if this inset is not at the paragraph end.
 831         layout_after = find_token(document.body, "\\end_layout", k)
 832         if layout_after == -1:
 833             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 834             return
 835         del_layout_after = True
 836         l = k + 1
 837         while l < layout_after:
 838             if document.body[l] != "":
 839                 del_layout_after = False
 840                 break
 841             l = l + 1
 842         if del_layout_after:
 843             del document.body[k+1:layout_after+1]
 844         else:
 845             document.body[k+1:k+1] = [layout_line, ""]
 846
 847         # delete \begin_layout and \end_inset and replace \begin_inset with
 848         # "\begin_layout Caption". This works because we can only have one
 849         # paragraph in the caption inset: The old \end_layout will be recycled.
 850         del document.body[k]
 851         if document.body[k] == "":
 852             del document.body[k]
 853         del document.body[j]
 854         if document.body[j] == "":
 855             del document.body[j]
 856         document.body[i] = "\\begin_layout Caption"
 857         if document.body[i+1] == "":
 858             del document.body[i+1]
 859         i = i + 1
 860
 861
 862 # Accents of InsetLaTeXAccent
 863 accent_map = {
 864     "`" : u'\u0300', # grave
 865     "'" : u'\u0301', # acute
 866     "^" : u'\u0302', # circumflex
 867     "~" : u'\u0303', # tilde
 868     "=" : u'\u0304', # macron
 869     "u" : u'\u0306', # breve
 870     "." : u'\u0307', # dot above
 871     "\"": u'\u0308', # diaresis
 872     "r" : u'\u030a', # ring above
 873     "H" : u'\u030b', # double acute
 874     "v" : u'\u030c', # caron
 875     "b" : u'\u0320', # minus sign below
 876     "d" : u'\u0323', # dot below
 877     "c" : u'\u0327', # cedilla
 878     "k" : u'\u0328', # ogonek
 879     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 880                      # only one is given as argument, so we don't need to
 881                      # treat it differently.
 882 }
 883
 884
 885 # special accents of InsetLaTeXAccent without argument
 886 special_accent_map = {
 887     'i' : u'\u0131', # dotless i
 888     'j' : u'\u0237', # dotless j
 889     'l' : u'\u0142', # l with stroke
 890     'L' : u'\u0141'  # L with stroke
 891 }
 892
 893
 894 # special accent arguments of InsetLaTeXAccent
 895 accented_map = {
 896     '\\i' : u'\u0131', # dotless i
 897     '\\j' : u'\u0237'  # dotless j
 898 }
 899
 900
 901 def _convert_accent(accent, accented_char):
 902     type = accent
 903     char = accented_char
 904     if char == '':
 905         if type in special_accent_map:
 906             return special_accent_map[type]
 907         # a missing char is treated as space by LyX
 908         char = ' '
 909     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 910         # Special caron, only used with t, d, l and L.
 911         # It is not in the map because we convert it to the same unicode
 912         # character as the normal caron: \q{} is only defined if babel with
 913         # the czech or slovak language is used, and the normal caron
 914         # produces the correct output if the T1 font encoding is used.
 915         # For the same reason we never convert to \q{} in the other direction.
 916         type = 'v'
 917     elif char in accented_map:
 918         char = accented_map[char]
 919     elif (len(char) > 1):
 920         # We can only convert accents on a single char
 921         return ''
 922     a = accent_map.get(type)
 923     if a:
 924         return unicodedata.normalize("NFKC", "%s%s" % (char, a))
 925     return ''
 926
 927
 928 def convert_ertbackslash(body, i, ert, default_layout):
 929     r""" -------------------------------------------------------------------------------------------
 930     Convert backslashes and '\n' into valid ERT code, append the converted
 931     text to body[i] and return the (maybe incremented) line index i"""
 932
 933     for c in ert:
 934         if c == '\\':
 935             body[i] = body[i] + '\\backslash '
 936             i = i + 1
 937             body.insert(i, '')
 938         elif c == '\n':
 939             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 940             i = i + 4
 941         else:
 942             body[i] = body[i] + c
 943     return i
 944
 945
 946 def convert_accent(document):
 947     # The following forms are supported by LyX:
 948     # '\i \"{a}' (standard form, as written by LyX)
 949     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 950     # '\i \"{ }' (also accepted if the accented char is a space)
 951     # '\i \" a'  (also accepted)
 952     # '\i \"'    (also accepted)
 953     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 954     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 955     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 956     i = 0
 957     while 1:
 958         i = find_re(document.body, re_wholeinset, i)
 959         if i == -1:
 960             return
 961         match = re_wholeinset.match(document.body[i])
 962         prefix = match.group(1)
 963         contents = match.group(3).strip()
 964         match = re_contents.match(contents)
 965         if match:
 966             # Strip first char (always \)
 967             accent = match.group(1)[1:]
 968             accented_contents = match.group(2).strip()
 969             match = re_accentedcontents.match(accented_contents)
 970             accented_char = match.group(1)
 971             converted = _convert_accent(accent, accented_char)
 972             if converted == '':
 973                 # Normalize contents
 974                 contents = '%s{%s}' % (accent, accented_char),
 975             else:
 976                 document.body[i] = '%s%s' % (prefix, converted)
 977                 i += 1
 978                 continue
 979         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 980         document.body[i] = prefix
 981         document.body[i+1:i+1] = ['\\begin_inset ERT',
 982                                   'status collapsed',
 983                                   '',
 984                                   '\\begin_layout %s' % document.default_layout,
 985                                   '',
 986                                   '',
 987                                   '']
 988         i = convert_ertbackslash(document.body, i + 7,
 989                                  '\\%s' % contents,
 990                                  document.default_layout)
 991         document.body[i+1:i+1] = ['\\end_layout',
 992                                   '',
 993                                   '\\end_inset']
 994         i += 3
 995
 996
 997 def revert_accent(document):
 998     inverse_accent_map = {}
 999     for k in accent_map:
1000         inverse_accent_map[accent_map[k]] = k
1001     inverse_special_accent_map = {}
1002     for k in special_accent_map:
1003         inverse_special_accent_map[special_accent_map[k]] = k
1004     inverse_accented_map = {}
1005     for k in accented_map:
1006         inverse_accented_map[accented_map[k]] = k
1007
1008     # Since LyX may insert a line break within a word we must combine all
1009     # words before unicode normalization.
1010     # We do this only if the next line starts with an accent, otherwise we
1011     # would create things like '\begin_inset ERTstatus'.
1012     numberoflines = len(document.body)
1013     for i in range(numberoflines-1):
1014         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1015             continue
1016         if (document.body[i+1][0] in inverse_accent_map):
1017             # the last character of this line and the first of the next line
1018             # form probably a surrogate pair.
1019             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1020                 document.body[i] += document.body[i+1][0]
1021                 document.body[i+1] = document.body[i+1][1:]
1022
1023     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1024     # This is needed to catch all accented characters.
1025     for i in range(numberoflines):
1026         # Unfortunately we have a mixture of unicode strings and plain strings,
1027         # because we never use u'xxx' for string literals, but 'xxx'.
1028         # Therefore we may have to try two times to normalize the data.
1029         try:
1030             document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1031         except TypeError:
1032             document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1033
1034     # Replace accented characters with InsetLaTeXAccent
1035     # Do not convert characters that can be represented in the chosen
1036     # encoding.
1037     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1038     lang_re = re.compile(r"^\\lang\s(\S+)")
1039     for i in range(len(document.body)):
1040
1041         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1042             # Track the encoding of the current line
1043             result = lang_re.match(document.body[i])
1044             if result:
1045                 language = result.group(1)
1046                 if language == "default":
1047                     encoding_stack[-1] = document.encoding
1048                 else:
1049                     from lyx2lyx_lang import lang
1050                     encoding_stack[-1] = lang[language][3]
1051                 continue
1052             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1053                 encoding_stack.append(encoding_stack[-1])
1054                 continue
1055             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1056                 del encoding_stack[-1]
1057                 continue
1058
1059         for j in range(len(document.body[i])):
1060             # dotless i and dotless j are both in special_accent_map and can
1061             # occur as an accented character, so we need to test that the
1062             # following character is no accent
1063             if (document.body[i][j] in inverse_special_accent_map and
1064                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1065                 accent = document.body[i][j]
1066                 try:
1067                     dummy = accent.encode(encoding_stack[-1])
1068                 except UnicodeEncodeError:
1069                     # Insert the rest of the line as new line
1070                     if j < len(document.body[i]) - 1:
1071                         document.body[i+1:i+1] = document.body[i][j+1:]
1072                     # Delete the accented character
1073                     if j > 0:
1074                         document.body[i] = document.body[i][:j-1]
1075                     else:
1076                         document.body[i] = u''
1077                     # Finally add the InsetLaTeXAccent
1078                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1079                     break
1080             elif j > 0 and document.body[i][j] in inverse_accent_map:
1081                 accented_char = document.body[i][j-1]
1082                 if accented_char == ' ':
1083                     # Conform to LyX output
1084                     accented_char = ''
1085                 elif accented_char in inverse_accented_map:
1086                     accented_char = inverse_accented_map[accented_char]
1087                 accent = document.body[i][j]
1088                 try:
1089                     dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1090                 except UnicodeEncodeError:
1091                     # Insert the rest of the line as new line
1092                     if j < len(document.body[i]) - 1:
1093                         document.body[i+1:i+1] = document.body[i][j+1:]
1094                     # Delete the accented characters
1095                     if j > 1:
1096                         document.body[i] = document.body[i][:j-2]
1097                     else:
1098                         document.body[i] = u''
1099                     # Finally add the InsetLaTeXAccent
1100                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1101                     break
1102     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1103     for i in range(numberoflines):
1104         document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1105
1106
1107 def normalize_font_whitespace_259(document):
1108     """ Before format 259 the font changes were ignored if a
1109     whitespace was the first or last character in the sequence, this function
1110     transfers the whitespace outside."""
1111
1112     char_properties = {"\\series": "default",
1113                        "\\emph": "default",
1114                        "\\color": "none",
1115                        "\\shape": "default",
1116                        "\\bar": "default",
1117                        "\\family": "default"}
1118     return normalize_font_whitespace(document, char_properties)
1119
1120 def normalize_font_whitespace_274(document):
1121     """ Before format 259 (sic) the font changes were ignored if a
1122     whitespace was the first or last character in the sequence. This was
1123     corrected for most font properties in format 259, but the language
1124     was forgotten then. This function applies the same conversion done
1125     there (namely, transfers the whitespace outside) for font language
1126     changes, as well."""
1127
1128     char_properties = {"\\lang": "default"}
1129     return normalize_font_whitespace(document, char_properties)
1130
1131 def get_paragraph_language(document, i):
1132     """ Return the language of the paragraph in which line i of the document
1133     body is. If the first thing in the paragraph is a \\lang command, that
1134     is the paragraph's langauge; otherwise, the paragraph's language is the
1135     document's language."""
1136
1137     lines = document.body
1138
1139     first_nonempty_line = \
1140         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1141
1142     words = lines[first_nonempty_line].split()
1143
1144     if len(words) > 1 and words[0] == "\\lang":
1145         return words[1]
1146     else:
1147         return document.language
1148
1149 def normalize_font_whitespace(document, char_properties):
1150     """ Before format 259 the font changes were ignored if a
1151     whitespace was the first or last character in the sequence, this function
1152     transfers the whitespace outside. Only a change in one of the properties
1153     in the provided     char_properties is handled by this function."""
1154
1155     if document.backend != "latex":
1156         return
1157
1158     lines = document.body
1159
1160     changes = {}
1161
1162     i = 0
1163     while i < len(lines):
1164         words = lines[i].split()
1165
1166         if len(words) > 0 and words[0] == "\\begin_layout":
1167             # a new paragraph resets all font changes
1168             changes.clear()
1169             # also reset the default language to be the paragraph's language
1170             if "\\lang" in char_properties.keys():
1171                 char_properties["\\lang"] = \
1172                     get_paragraph_language(document, i + 1)
1173
1174         elif len(words) > 1 and words[0] in char_properties.keys():
1175             # we have a font change
1176             if char_properties[words[0]] == words[1]:
1177                 # property gets reset
1178                 if words[0] in changes.keys():
1179                     del changes[words[0]]
1180                 defaultproperty = True
1181             else:
1182                 # property gets set
1183                 changes[words[0]] = words[1]
1184                 defaultproperty = False
1185
1186             # We need to explicitly reset all changed properties if we find
1187             # a space below, because LyX 1.4 would output the space after
1188             # closing the previous change and before starting the new one,
1189             # and closing a font change means to close all properties, not
1190             # just the changed one.
1191
1192             if lines[i-1] and lines[i-1][-1] == " ":
1193                 lines[i-1] = lines[i-1][:-1]
1194                 # a space before the font change
1195                 added_lines = [" "]
1196                 for k in changes.keys():
1197                     # exclude property k because that is already in lines[i]
1198                     if k != words[0]:
1199                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1200                 for k in changes.keys():
1201                     # exclude property k because that must be added below anyway
1202                     if k != words[0]:
1203                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1204                 if defaultproperty:
1205                     # Property is reset in lines[i], so add the new stuff afterwards
1206                     lines[i+1:i+1] = added_lines
1207                 else:
1208                     # Reset property for the space
1209                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1210                     lines[i:i] = added_lines
1211                 i = i + len(added_lines)
1212
1213             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1214                 # a space after the font change
1215                 if (lines[i+1] == " " and lines[i+2]):
1216                     next_words = lines[i+2].split()
1217                     if len(next_words) > 0 and next_words[0] == words[0]:
1218                         # a single blank with a property different from the
1219                         # previous and the next line must not be changed
1220                         i = i + 2
1221                         continue
1222                 lines[i+1] = lines[i+1][1:]
1223                 added_lines = [" "]
1224                 for k in changes.keys():
1225                     # exclude property k because that is already in lines[i]
1226                     if k != words[0]:
1227                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1228                 for k in changes.keys():
1229                     # exclude property k because that must be added below anyway
1230                     if k != words[0]:
1231                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1232                 # Reset property for the space
1233                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1234                 lines[i:i] = added_lines
1235                 i = i + len(added_lines)
1236
1237         i = i + 1
1238
1239
1240 def revert_utf8x(document):
1241     " Set utf8x encoding to utf8. "
1242     i = find_token(document.header, "\\inputencoding", 0)
1243     if i == -1:
1244         document.header.append("\\inputencoding auto")
1245     else:
1246         inputenc = get_value(document.header, "\\inputencoding", i)
1247         if inputenc == "utf8x":
1248             document.header[i] = "\\inputencoding utf8"
1249     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1250
1251
1252 def revert_utf8plain(document):
1253     " Set utf8plain encoding to utf8. "
1254     i = find_token(document.header, "\\inputencoding", 0)
1255     if i == -1:
1256         document.header.append("\\inputencoding auto")
1257     else:
1258         inputenc = get_value(document.header, "\\inputencoding", i)
1259         if inputenc == "utf8-plain":
1260             document.header[i] = "\\inputencoding utf8"
1261     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1262
1263
1264 def revert_beamer_alert(document):
1265     " Revert beamer's \\alert inset back to ERT. "
1266     i = 0
1267     while 1:
1268         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1269         if i == -1:
1270             return
1271         document.body[i] = "\\begin_inset ERT"
1272         i = i + 1
1273         while 1:
1274             if (document.body[i][:13] == "\\begin_layout"):
1275                 # Insert the \alert command
1276                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1277                 break
1278             i = i + 1
1279
1280         i = i + 1
1281
1282
1283 def revert_beamer_structure(document):
1284     " Revert beamer's \\structure inset back to ERT. "
1285     i = 0
1286     while 1:
1287         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1288         if i == -1:
1289             return
1290         document.body[i] = "\\begin_inset ERT"
1291         i = i + 1
1292         while 1:
1293             if (document.body[i][:13] == "\\begin_layout"):
1294                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1295                 break
1296             i = i + 1
1297
1298         i = i + 1
1299
1300
1301 def convert_changes(document):
1302     " Switch output_changes off if tracking_changes is off. "
1303     i = find_token(document.header, '\\tracking_changes', 0)
1304     if i == -1:
1305         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1306         return
1307     j = find_token(document.header, '\\output_changes', 0)
1308     if j == -1:
1309         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1310         return
1311     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1312     output_changes = get_value(document.header, "\\output_changes", j)
1313     if tracking_changes == "false" and output_changes == "true":
1314         document.header[j] = "\\output_changes false"
1315
1316
1317 def revert_ascii(document):
1318     " Set ascii encoding to auto. "
1319     i = find_token(document.header, "\\inputencoding", 0)
1320     if i == -1:
1321         document.header.append("\\inputencoding auto")
1322     else:
1323         inputenc = get_value(document.header, "\\inputencoding", i)
1324         if inputenc == "ascii":
1325             document.header[i] = "\\inputencoding auto"
1326     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1327
1328
1329 def normalize_language_name(document):
1330     lang = { "brazil": "brazilian",
1331              "portuges": "portuguese"}
1332
1333     if document.language in lang:
1334         document.language = lang[document.language]
1335         i = find_token(document.header, "\\language", 0)
1336         document.header[i] = "\\language %s" % document.language
1337
1338
1339 def revert_language_name(document):
1340     lang = { "brazilian": "brazil",
1341              "portuguese": "portuges"}
1342
1343     if document.language in lang:
1344         document.language = lang[document.language]
1345         i = find_token(document.header, "\\language", 0)
1346         document.header[i] = "\\language %s" % document.language
1347
1348 #
1349 #  \textclass cv -> \textclass simplecv
1350 def convert_cv_textclass(document):
1351     if document.textclass == "cv":
1352         document.textclass = "simplecv"
1353
1354
1355 def revert_cv_textclass(document):
1356     if document.textclass == "simplecv":
1357         document.textclass = "cv"
1358
1359
1360 #
1361 # add scaleBeforeRotation graphics param
1362 def convert_graphics_rotation(document):
1363     " add scaleBeforeRotation graphics parameter. "
1364     i = 0
1365     while 1:
1366         i = find_token(document.body, "\\begin_inset Graphics", i)
1367         if i == -1:
1368             return
1369         j = find_end_of_inset(document.body, i+1)
1370         if j == -1:
1371             # should not happen
1372             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1373         # Seach for rotateAngle and width or height or scale
1374         # If these params are not there, nothing needs to be done.
1375         k = find_token(document.body, "\trotateAngle", i + 1, j)
1376         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1377         if (k != -1 and l != -1):
1378             document.body.insert(j, 'scaleBeforeRotation')
1379         i = i + 1
1380
1381
1382 #
1383 # remove scaleBeforeRotation graphics param
1384 def revert_graphics_rotation(document):
1385     " remove scaleBeforeRotation graphics parameter. "
1386     i = 0
1387     while 1:
1388         i = find_token(document.body, "\\begin_inset Graphics", i)
1389         if i == -1:
1390             return
1391         j = find_end_of_inset(document.body, i + 1)
1392         if j == -1:
1393             # should not happen
1394             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1395         # If there's a scaleBeforeRotation param, just remove that
1396         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1397         if k != -1:
1398             del document.body[k]
1399         else:
1400             # if not, and if we have rotateAngle and width or height or scale,
1401             # we have to put the rotateAngle value to special
1402             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1403             special = get_value(document.body, 'special', i + 1, j)
1404             if rotateAngle != "":
1405                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1406                 if k == -1:
1407                     break
1408                 if special == "":
1409                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1410                 else:
1411                     l = find_token(document.body, "\tspecial", i + 1, j)
1412                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1413                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1414                 if k != -1:
1415                     del document.body[k]
1416         i = i + 1
1417
1418
1419
1420 def convert_tableborder(document):
1421     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1422     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1423     i = 0
1424     while i < len(document.body):
1425         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1426         k = document.body[i].find("|>{", 0, len(document.body[i]))
1427         # the two tokens have to be in one line
1428         if (h != -1 and k != -1):
1429             # delete the "|"
1430             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1431         i = i + 1
1432
1433
1434 def revert_tableborder(document):
1435     i = 0
1436     while i < len(document.body):
1437         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1438         k = document.body[i].find(">{", 0, len(document.body[i]))
1439         # the two tokens have to be in one line
1440         if (h != -1 and k != -1):
1441             # add the "|"
1442             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1443         i = i + 1
1444
1445
1446 def revert_armenian(document):
1447
1448     # set inputencoding from armscii8 to auto
1449     if document.inputencoding == "armscii8":
1450         i = find_token(document.header, "\\inputencoding", 0)
1451         if i != -1:
1452             document.header[i] = "\\inputencoding auto"
1453     # check if preamble exists, if not k is set to -1
1454     i = 0
1455     k = -1
1456     while i < len(document.preamble):
1457         if k == -1:
1458             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1459         if k == -1:
1460             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1461         i = i + 1
1462     # add the entry \usepackage{armtex} to the document preamble
1463     if document.language == "armenian":
1464         # set the armtex entry as the first preamble line
1465         if k != -1:
1466             document.preamble[0:0] = ["\\usepackage{armtex}"]
1467         # create the preamble when it doesn't exist
1468         else:
1469             document.preamble.append('\\usepackage{armtex}')
1470     # Set document language from armenian to english
1471     if document.language == "armenian":
1472         document.language = "english"
1473         i = find_token(document.header, "\\language", 0)
1474         if i != -1:
1475             document.header[i] = "\\language english"
1476
1477
1478 def revert_CJK(document):
1479     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1480     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1481                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1482     i = find_token(document.header, "\\inputencoding", 0)
1483     if i == -1:
1484         document.header.append("\\inputencoding auto")
1485     else:
1486         inputenc = get_value(document.header, "\\inputencoding", i)
1487         if inputenc in encodings:
1488             document.header[i] = "\\inputencoding default"
1489     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1490
1491     if document.language == "chinese-simplified" or \
1492        document.language == "chinese-traditional" or \
1493        document.language == "japanese" or document.language == "korean":
1494         document.language = "english"
1495         i = find_token(document.header, "\\language", 0)
1496         if i != -1:
1497             document.header[i] = "\\language english"
1498
1499
1500 def revert_preamble_listings_params(document):
1501     " Revert preamble option \listings_params "
1502     i = find_token(document.header, "\\listings_params", 0)
1503     if i != -1:
1504         document.preamble.append('\\usepackage{listings}')
1505         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1506         document.header.pop(i);
1507
1508
1509 def revert_listings_inset(document):
1510     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1511 FROM
1512
1513 \begin_inset
1514 lstparams "language=Delphi"
1515 inline true
1516 status open
1517
1518 \begin_layout Standard
1519 var i = 10;
1520 \end_layout
1521
1522 \end_inset
1523
1524 TO
1525
1526 \begin_inset ERT
1527 status open
1528 \begin_layout Standard
1529
1530
1531 \backslash
1532 lstinline[language=Delphi]{var i = 10;}
1533 \end_layout
1534
1535 \end_inset
1536
1537 There can be an caption inset in this inset
1538
1539 \begin_layout Standard
1540 \begin_inset Caption
1541
1542 \begin_layout Standard
1543 before label
1544 \begin_inset LatexCommand label
1545 name "lst:caption"
1546
1547 \end_inset
1548
1549 after label
1550 \end_layout
1551
1552 \end_inset
1553
1554
1555 \end_layout
1556
1557 '''
1558     i = 0
1559     while True:
1560         i = find_token(document.body, '\\begin_inset listings', i)
1561         if i == -1:
1562             break
1563         else:
1564             if not '\\usepackage{listings}' in document.preamble:
1565                 document.preamble.append('\\usepackage{listings}')
1566         j = find_end_of_inset(document.body, i + 1)
1567         if j == -1:
1568             # this should not happen
1569             break
1570         inline = 'false'
1571         params = ''
1572         status = 'open'
1573         # first three lines
1574         for line in range(i + 1, i + 4):
1575             if document.body[line].startswith('inline'):
1576                 inline = document.body[line].split()[1]
1577             if document.body[line].startswith('lstparams'):
1578                 params = document.body[line].split()[1].strip('"')
1579             if document.body[line].startswith('status'):
1580                 status = document.body[line].split()[1].strip()
1581                 k = line + 1
1582         # caption?
1583         caption = ''
1584         label = ''
1585         cap = find_token(document.body, '\\begin_inset Caption', i)
1586         if cap != -1:
1587             cap_end = find_end_of_inset(document.body, cap + 1)
1588             if cap_end == -1:
1589                 # this should not happen
1590                 break
1591             # label?
1592             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1593             if lbl != -1:
1594                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1595                 if lbl_end == -1:
1596                     # this should not happen
1597                     break
1598             else:
1599                 lbl = cap_end
1600                 lbl_end = cap_end
1601             for line in document.body[lbl : lbl_end + 1]:
1602                 if line.startswith('name '):
1603                     label = line.split()[1].strip('"')
1604                     break
1605             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1606                 if not line.startswith('\\'):
1607                     caption += line.strip()
1608             k = cap_end + 1
1609         inlinecode = ''
1610         # looking for the oneline code for lstinline
1611         inlinecode = document.body[find_end_of_layout(document.body,
1612             find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
1613         if len(caption) > 0:
1614             if len(params) == 0:
1615                 params = 'caption={%s}' % caption
1616             else:
1617                 params += ',caption={%s}' % caption
1618         if len(label) > 0:
1619             if len(params) == 0:
1620                 params = 'label={%s}' % label
1621             else:
1622                 params += ',label={%s}' % label
1623         if len(params) > 0:
1624             params = '[%s]' % params
1625             params = params.replace('\\', '\\backslash\n')
1626         if inline == 'true':
1627             document.body[i:(j+1)] = [r'\begin_inset ERT',
1628                                       'status %s' % status,
1629                                       r'\begin_layout Standard',
1630                                       '',
1631                                       '',
1632                                       r'\backslash',
1633                                       'lstinline%s{%s}' % (params, inlinecode),
1634                                       r'\end_layout',
1635                                       '',
1636                                       r'\end_inset']
1637         else:
1638             document.body[i: j+1] =  [r'\begin_inset ERT',
1639                                       'status %s' % status,
1640                                       '',
1641                                       r'\begin_layout Standard',
1642                                       '',
1643                                       '',
1644                                       r'\backslash',
1645                                       r'begin{lstlisting}%s' % params,
1646                                       r'\end_layout'
1647                                     ] + document.body[k : j - 1] + \
1648                                      ['',
1649                                       r'\begin_layout Standard',
1650                                       '',
1651                                       r'\backslash',
1652                                       'end{lstlisting}',
1653                                       r'\end_layout',
1654                                       '',
1655                                       r'\end_inset']
1656
1657
1658 def revert_include_listings(document):
1659     r''' Revert lstinputlisting Include option , translate
1660 \begin_inset Include \lstinputlisting{file}[opt]
1661 preview false
1662
1663 \end_inset
1664
1665 TO
1666
1667 \begin_inset ERT
1668 status open
1669
1670 \begin_layout Standard
1671
1672
1673 \backslash
1674 lstinputlisting{file}[opt]
1675 \end_layout
1676
1677 \end_inset
1678     '''
1679
1680     i = 0
1681     while True:
1682         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1683         if i == -1:
1684             break
1685         else:
1686             if not '\\usepackage{listings}' in document.preamble:
1687                 document.preamble.append('\\usepackage{listings}')
1688         j = find_end_of_inset(document.body, i + 1)
1689         if j == -1:
1690             # this should not happen
1691             break
1692         # find command line lstinputlisting{file}[options]
1693         cmd, file, option = '', '', ''
1694         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1695             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1696         option = option.replace('\\', '\\backslash\n')
1697         document.body[i : j + 1] = [r'\begin_inset ERT',
1698                                     'status open',
1699                                     '',
1700                                     r'\begin_layout Standard',
1701                                     '',
1702                                     '',
1703                                     r'\backslash',
1704                                     '%s%s{%s}' % (cmd, option, file),
1705                                     r'\end_layout',
1706                                     '',
1707                                     r'\end_inset']
1708
1709
1710 def revert_ext_font_sizes(document):
1711     if document.backend != "latex": return
1712     if not document.textclass.startswith("ext"): return
1713
1714     fontsize = get_value(document.header, '\\paperfontsize', 0)
1715     if fontsize not in ('10', '11', '12'): return
1716     fontsize += 'pt'
1717
1718     i = find_token(document.header, '\\paperfontsize', 0)
1719     document.header[i] = '\\paperfontsize default'
1720
1721     i = find_token(document.header, '\\options', 0)
1722     if i == -1:
1723         i = find_token(document.header, '\\textclass', 0) + 1
1724         document.header[i:i] = ['\\options %s' % fontsize]
1725     else:
1726         document.header[i] += ',%s' % fontsize
1727
1728
1729 def convert_ext_font_sizes(document):
1730     if document.backend != "latex": return
1731     if not document.textclass.startswith("ext"): return
1732
1733     fontsize = get_value(document.header, '\\paperfontsize', 0)
1734     if fontsize != 'default': return
1735
1736     i = find_token(document.header, '\\options', 0)
1737     if i == -1: return
1738
1739     options = get_value(document.header, '\\options', i)
1740
1741     fontsizes = '10pt', '11pt', '12pt'
1742     for fs in fontsizes:
1743         if options.find(fs) != -1:
1744             break
1745     else: # this else will only be attained if the for cycle had no match
1746         return
1747
1748     options = options.split(',')
1749     for j, opt in enumerate(options):
1750         if opt in fontsizes:
1751             fontsize = opt[:-2]
1752             del options[j]
1753             break
1754     else:
1755         return
1756
1757     k = find_token(document.header, '\\paperfontsize', 0)
1758     document.header[k] = '\\paperfontsize %s' % fontsize
1759
1760     if options:
1761         document.header[i] = '\\options %s' % ','.join(options)
1762     else:
1763         del document.header[i]
1764
1765 def revert_separator_layout(document):
1766     r'''Revert --Separator-- to a lyx note
1767 From
1768
1769 \begin_layout --Separator--
1770 something
1771 \end_layout
1772
1773 to
1774
1775 \begin_layout Standard
1776 \begin_inset Note Note
1777 status open
1778
1779 \begin_layout Standard
1780 Separate Evironment
1781 \end_layout
1782
1783 \end_inset
1784 something
1785
1786 \end_layout
1787
1788     '''
1789
1790     i = 0
1791     while True:
1792         i = find_token(document.body, r'\begin_layout --Separator--', i)
1793         if i == -1:
1794             break
1795         j = find_end_of_layout(document.body, i + 1)
1796         if j == -1:
1797             # this should not happen
1798             break
1799         document.body[i : j + 1] = [r'\begin_layout Standard',
1800                                     r'\begin_inset Note Note',
1801                                     'status open',
1802                                     '',
1803                                     r'\begin_layout Standard',
1804                                     'Separate Environment',
1805                                     r'\end_layout',
1806                                     '',
1807                                     r'\end_inset'] + \
1808                                     document.body[ i + 1 : j] + \
1809                                     ['',
1810                                     r'\end_layout'
1811                                     ]
1812
1813 def convert_arabic (document):
1814     if document.language == "arabic":
1815         document.language = "arabic_arabtex"
1816         i = find_token(document.header, "\\language", 0)
1817         if i != -1:
1818             document.header[i] = "\\language arabic_arabtex"
1819     i = 0
1820     while i < len(document.body):
1821         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1822         if (h != -1):
1823             # change the language name
1824             document.body[i] = '\lang arabic_arabtex'
1825         i = i + 1
1826
1827 def revert_arabic (document):
1828     if document.language == "arabic_arabtex":
1829         document.language = "arabic"
1830         i = find_token(document.header, "\\language", 0)
1831         if i != -1:
1832             document.header[i] = "\\language arabic"
1833     i = 0
1834     while i < len(document.body):
1835         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1836         if (h != -1):
1837             # change the language name
1838             document.body[i] = '\lang arabic'
1839         i = i + 1
1840
1841 def revert_unicode(document):
1842     '''Transform unicode symbols according to the unicode list.
1843 Preamble flags are not implemented.
1844 Combination characters are currently ignored.
1845 Forced output is currently not enforced'''
1846     pathname = os.path.dirname(sys.argv[0])
1847     fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
1848     spec_chars = {}
1849     for line in fp.readlines():
1850         if line[0] != '#':
1851             line=line.replace(' "',' ') # remove all quotation marks with spaces before
1852             line=line.replace('" ',' ') # remove all quotation marks with spaces after
1853             line=line.replace(r'\"','"') # replace \" by " (for characters with diaresis)
1854             try:
1855                 # flag1 and flag2 are preamble & flags
1856                 # currently NOT implemented
1857                 [ucs4,command,flag1,flag2] =line.split(None,3)
1858                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1859             except:
1860                 pass
1861     fp.close()
1862     # Define strings to start and end ERT and math insets
1863     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
1864     ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
1865     math_intro='\n\\begin_inset Formula $'
1866     math_outro='$\n\\end_inset\n'
1867     # Find unicode characters and replace them
1868     in_ert = 0 # flag set to 1 if in ERT inset
1869     in_math = 0 # flag set to 1 if in math inset
1870     insets = [] # list of active insets
1871     for i, current_line in enumerate(document.body):
1872         if current_line.find('\\begin_inset') > -1:
1873             # check which inset to start
1874             if current_line.find('\\begin_inset ERT') > -1:
1875                 in_ert = 1
1876                 insets.append('ert')
1877             elif current_line.find('\\begin_inset Formula') > -1:
1878                 in_math = 1
1879                 insets.append('math')
1880             else:
1881                 insets.append('other')
1882         if current_line.find('\\end_inset') > -1:
1883             # check which inset to end
1884             try:
1885                 cur_inset = insets.pop()
1886                 if cur_inset == 'ert':
1887                     in_ert = 0
1888                 elif cur_inset == 'math':
1889                     in_math = 0
1890                 else:
1891                     pass # end of other inset
1892             except:
1893                 pass # inset list was empty (for some reason)
1894         current_line=''; # clear to have as container for modified line
1895         for j in range(len(document.body[i])):
1896             if spec_chars.has_key(document.body[i][j]):
1897                 flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
1898                 if flags.find('combining') > -1:
1899                     command = ''
1900                 else:
1901                     command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
1902                     if command[0:2] == '\\\\':
1903                         if command[2:12]=='ensuremath':
1904                             if in_ert == 1:
1905                                 # math in ERT
1906                                 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1907                                 command = command.replace('}', '$\n')
1908                             elif in_math == 0:
1909                                 # add a math inset with the replacement character
1910                                 command = command.replace('\\\\ensuremath{\\', math_intro)
1911                                 command = command.replace('}', math_outro)
1912                             else:
1913                                 # we are already in a math inset
1914                                 command = command.replace('\\\\ensuremath{\\', '')
1915                                 command = command.replace('}', '')
1916                         else:
1917                             if in_math == 1:
1918                                 # avoid putting an ERT in a math; instead put command as text
1919                                 command = command.replace('\\\\', '\mathrm{')
1920                                 command = command + '}'
1921                             elif in_ert == 0:
1922                                 # add an ERT inset with the replacement character
1923                                 command = command.replace('\\\\', ert_intro)
1924                                 command = command + ert_outro
1925                             else:
1926                                 command = command.replace('\\\\', '\n\\backslash\n')
1927                 current_line = current_line + command
1928             else:
1929                 current_line = current_line + document.body[i][j]
1930         document.body[i] = current_line
1931
1932
1933 ##
1934 # Conversion hub
1935 #
1936
1937 supported_versions = ["1.5.0","1.5"]
1938 convert = [[246, []],
1939            [247, [convert_font_settings]],
1940            [248, []],
1941            [249, [convert_utf8]],
1942            [250, []],
1943            [251, []],
1944            [252, [convert_commandparams, convert_bibitem]],
1945            [253, []],
1946            [254, [convert_esint]],
1947            [255, []],
1948            [256, []],
1949            [257, [convert_caption]],
1950            [258, [convert_lyxline]],
1951            [259, [convert_accent, normalize_font_whitespace_259]],
1952            [260, []],
1953            [261, [convert_changes]],
1954            [262, []],
1955            [263, [normalize_language_name]],
1956            [264, [convert_cv_textclass]],
1957            [265, [convert_tableborder]],
1958            [266, []],
1959            [267, []],
1960            [268, []],
1961            [269, []],
1962            [270, []],
1963            [271, [convert_ext_font_sizes]],
1964            [272, []],
1965            [273, []],
1966            [274, [normalize_font_whitespace_274]],
1967            [275, [convert_graphics_rotation]],
1968            [276, [convert_arabic]]
1969           ]
1970
1971 revert =  [
1972            [275, [revert_arabic]],
1973            [274, [revert_graphics_rotation]],
1974            [273, []],
1975            [272, [revert_separator_layout]],
1976            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1977            [270, [revert_ext_font_sizes]],
1978            [269, [revert_beamer_alert, revert_beamer_structure]],
1979            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1980            [267, [revert_CJK]],
1981            [266, [revert_utf8plain]],
1982            [265, [revert_armenian]],
1983            [264, [revert_tableborder]],
1984            [263, [revert_cv_textclass]],
1985            [262, [revert_language_name]],
1986            [261, [revert_ascii]],
1987            [260, []],
1988            [259, [revert_utf8x]],
1989            [258, []],
1990            [257, []],
1991            [256, [revert_caption]],
1992            [255, [revert_encodings]],
1993            [254, [revert_clearpage, revert_cleardoublepage]],
1994            [253, [revert_esint]],
1995            [252, [revert_nomenclature, revert_printnomenclature]],
1996            [251, [revert_commandparams]],
1997            [250, [revert_cs_label]],
1998            [249, []],
1999            [248, [revert_accent, revert_utf8, revert_unicode]],
2000            [247, [revert_booktabs]],
2001            [246, [revert_font_settings]],
2002            [245, [revert_framed]]]
2003
2004
2005 if __name__ == "__main__":
2006     pass
2007
2008