lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     if document.cjk_encoding != '':
 250         return
 251     encoding_stack = [document.encoding]
 252     lang_re = re.compile(r"^\\lang\s(\S+)")
 253     if document.inputencoding == "auto" or document.inputencoding == "default":
 254         for i in range(len(document.body)):
 255             result = lang_re.match(document.body[i])
 256             if result:
 257                 language = result.group(1)
 258                 if language == "default":
 259                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 260                     encoding_stack[-1] = document.encoding
 261                 else:
 262                     from lyx2lyx_lang import lang
 263                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 264                     encoding_stack[-1] = lang[language][3]
 265             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 266                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 267                 encoding_stack.append(encoding_stack[-1])
 268             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 269                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(encoding_stack) == 1:
 271                     # Don't remove the document encoding from the stack
 272                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 273                 else:
 274                     del encoding_stack[-1]
 275             if encoding_stack[-1] != document.encoding:
 276                 if forward:
 277                     # This line has been incorrectly interpreted as if it was
 278                     # encoded in 'encoding'.
 279                     # Convert back to the 8bit string that was in the file.
 280                     orig = document.body[i].encode(document.encoding)
 281                     # Convert the 8bit string that was in the file to unicode
 282                     # with the correct encoding.
 283                     document.body[i] = orig.decode(encoding_stack[-1])
 284                 else:
 285                     # Convert unicode to the 8bit string that will be written
 286                     # to the file with the correct encoding.
 287                     orig = document.body[i].encode(encoding_stack[-1])
 288                     # Convert the 8bit string that will be written to the
 289                     # file to fake unicode with the encoding that will later
 290                     # be used when writing to the file.
 291                     document.body[i] = orig.decode(document.encoding)
 292
 293
 294 def convert_utf8(document):
 295     " Set document encoding to UTF-8. "
 296     convert_multiencoding(document, True)
 297     document.encoding = "utf8"
 298
 299
 300 def revert_utf8(document):
 301     " Set document encoding to the value corresponding to inputencoding. "
 302     i = find_token(document.header, "\\inputencoding", 0)
 303     if i == -1:
 304         document.header.append("\\inputencoding auto")
 305     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 306         document.header[i] = "\\inputencoding auto"
 307     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 308     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 309     convert_multiencoding(document, False)
 310
 311
 312 def revert_cs_label(document):
 313     " Remove status flag of charstyle label. "
 314     i = 0
 315     while 1:
 316         i = find_token(document.body, "\\begin_inset CharStyle", i)
 317         if i == -1:
 318             return
 319         # Seach for a line starting 'show_label'
 320         # If it is not there, break with a warning message
 321         i = i + 1
 322         while 1:
 323             if (document.body[i][:10] == "show_label"):
 324                 del document.body[i]
 325                 break
 326             elif (document.body[i][:13] == "\\begin_layout"):
 327                 document.warning("Malformed LyX document: Missing 'show_label'.")
 328                 break
 329             i = i + 1
 330
 331         i = i + 1
 332
 333
 334 def convert_bibitem(document):
 335     """ Convert
 336 \bibitem [option]{argument}
 337
 338 to
 339
 340 \begin_inset LatexCommand bibitem
 341 label "option"
 342 key "argument"
 343
 344 \end_inset
 345
 346 This must be called after convert_commandparams.
 347 """
 348     i = 0
 349     while 1:
 350         i = find_token(document.body, "\\bibitem", i)
 351         if i == -1:
 352             break
 353         j = document.body[i].find('[') + 1
 354         k = document.body[i].rfind(']')
 355         if j == 0: # No optional argument found
 356             option = None
 357         else:
 358             option = document.body[i][j:k]
 359         j = document.body[i].rfind('{') + 1
 360         k = document.body[i].rfind('}')
 361         argument = document.body[i][j:k]
 362         lines = ['\\begin_inset LatexCommand bibitem']
 363         if option != None:
 364             lines.append('label "%s"' % option.replace('"', '\\"'))
 365         lines.append('key "%s"' % argument.replace('"', '\\"'))
 366         lines.append('')
 367         lines.append('\\end_inset')
 368         document.body[i:i+1] = lines
 369         i = i + 1
 370
 371
 372 commandparams_info = {
 373     # command : [option1, option2, argument]
 374     "bibitem" : ["label", "", "key"],
 375     "bibtex" : ["options", "btprint", "bibfiles"],
 376     "cite"        : ["after", "before", "key"],
 377     "citet"       : ["after", "before", "key"],
 378     "citep"       : ["after", "before", "key"],
 379     "citealt"     : ["after", "before", "key"],
 380     "citealp"     : ["after", "before", "key"],
 381     "citeauthor"  : ["after", "before", "key"],
 382     "citeyear"    : ["after", "before", "key"],
 383     "citeyearpar" : ["after", "before", "key"],
 384     "citet*"      : ["after", "before", "key"],
 385     "citep*"      : ["after", "before", "key"],
 386     "citealt*"    : ["after", "before", "key"],
 387     "citealp*"    : ["after", "before", "key"],
 388     "citeauthor*" : ["after", "before", "key"],
 389     "Citet"       : ["after", "before", "key"],
 390     "Citep"       : ["after", "before", "key"],
 391     "Citealt"     : ["after", "before", "key"],
 392     "Citealp"     : ["after", "before", "key"],
 393     "Citeauthor"  : ["after", "before", "key"],
 394     "Citet*"      : ["after", "before", "key"],
 395     "Citep*"      : ["after", "before", "key"],
 396     "Citealt*"    : ["after", "before", "key"],
 397     "Citealp*"    : ["after", "before", "key"],
 398     "Citeauthor*" : ["after", "before", "key"],
 399     "citefield"   : ["after", "before", "key"],
 400     "citetitle"   : ["after", "before", "key"],
 401     "cite*"       : ["after", "before", "key"],
 402     "hfill" : ["", "", ""],
 403     "index"      : ["", "", "name"],
 404     "printindex" : ["", "", "name"],
 405     "label" : ["", "", "name"],
 406     "eqref"     : ["name", "", "reference"],
 407     "pageref"   : ["name", "", "reference"],
 408     "prettyref" : ["name", "", "reference"],
 409     "ref"       : ["name", "", "reference"],
 410     "vpageref"  : ["name", "", "reference"],
 411     "vref"      : ["name", "", "reference"],
 412     "tableofcontents" : ["", "", "type"],
 413     "htmlurl" : ["name", "", "target"],
 414     "url"     : ["name", "", "target"]}
 415
 416
 417 def convert_commandparams(document):
 418     """ Convert
 419
 420  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 421  \end_inset
 422
 423  to
 424
 425  \begin_inset LatexCommand cmdname
 426  name1 "opt1"
 427  name2 "opt2"
 428  name3 "arg"
 429  \end_inset
 430
 431  name1, name2 and name3 can be different for each command.
 432 """
 433     # \begin_inset LatexCommand bibitem was not the official version (see
 434     # convert_bibitem()), but could be read in, so we convert it here, too.
 435
 436     i = 0
 437     while 1:
 438         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 439         if i == -1:
 440             break
 441         command = document.body[i][26:].strip()
 442         if command == "":
 443             document.warning("Malformed LyX document: Missing LatexCommand name.")
 444             i = i + 1
 445             continue
 446
 447         # The following parser is taken from the original InsetCommandParams::scanCommand
 448         name = ""
 449         option1 = ""
 450         option2 = ""
 451         argument = ""
 452         state = "WS"
 453         # Used to handle things like \command[foo[bar]]{foo{bar}}
 454         nestdepth = 0
 455         b = 0
 456         for c in command:
 457             if ((state == "CMDNAME" and c == ' ') or
 458                 (state == "CMDNAME" and c == '[') or
 459                 (state == "CMDNAME" and c == '{')):
 460                 state = "WS"
 461             if ((state == "OPTION" and c == ']') or
 462                 (state == "SECOPTION" and c == ']') or
 463                 (state == "CONTENT" and c == '}')):
 464                 if nestdepth == 0:
 465                     state = "WS"
 466                 else:
 467                     nestdepth = nestdepth - 1
 468             if ((state == "OPTION" and c == '[') or
 469                 (state == "SECOPTION" and c == '[') or
 470                 (state == "CONTENT" and c == '{')):
 471                 nestdepth = nestdepth + 1
 472             if state == "CMDNAME":
 473                     name += c
 474             elif state == "OPTION":
 475                     option1 += c
 476             elif state == "SECOPTION":
 477                     option2 += c
 478             elif state == "CONTENT":
 479                     argument += c
 480             elif state == "WS":
 481                 if c == '\\':
 482                     state = "CMDNAME"
 483                 elif c == '[' and b != ']':
 484                     state = "OPTION"
 485                     nestdepth = 0 # Just to be sure
 486                 elif c == '[' and b == ']':
 487                     state = "SECOPTION"
 488                     nestdepth = 0 # Just to be sure
 489                 elif c == '{':
 490                     state = "CONTENT"
 491                     nestdepth = 0 # Just to be sure
 492             b = c
 493
 494         # Now we have parsed the command, output the parameters
 495         lines = ["\\begin_inset LatexCommand %s" % name]
 496         if option1 != "":
 497             if commandparams_info[name][0] == "":
 498                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 499             else:
 500                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 501         if option2 != "":
 502             if commandparams_info[name][1] == "":
 503                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 504             else:
 505                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 506         if argument != "":
 507             if commandparams_info[name][2] == "":
 508                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 509             else:
 510                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 511         document.body[i:i+1] = lines
 512         i = i + 1
 513
 514
 515 def revert_commandparams(document):
 516     regex = re.compile(r'(\S+)\s+(.+)')
 517     i = 0
 518     while 1:
 519         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 520         if i == -1:
 521             break
 522         name = document.body[i].split()[2]
 523         j = find_end_of_inset(document.body, i + 1)
 524         preview_line = ""
 525         option1 = ""
 526         option2 = ""
 527         argument = ""
 528         for k in range(i + 1, j):
 529             match = re.match(regex, document.body[k])
 530             if match:
 531                 pname = match.group(1)
 532                 pvalue = match.group(2)
 533                 if pname == "preview":
 534                     preview_line = document.body[k]
 535                 elif (commandparams_info[name][0] != "" and
 536                       pname == commandparams_info[name][0]):
 537                     option1 = pvalue.strip('"').replace('\\"', '"')
 538                 elif (commandparams_info[name][1] != "" and
 539                       pname == commandparams_info[name][1]):
 540                     option2 = pvalue.strip('"').replace('\\"', '"')
 541                 elif (commandparams_info[name][2] != "" and
 542                       pname == commandparams_info[name][2]):
 543                     argument = pvalue.strip('"').replace('\\"', '"')
 544             elif document.body[k].strip() != "":
 545                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 546         if name == "bibitem":
 547             if option1 == "":
 548                 lines = ["\\bibitem {%s}" % argument]
 549             else:
 550                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 551         else:
 552             if option1 == "":
 553                 if option2 == "":
 554                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 555                 else:
 556                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 557             else:
 558                 if option2 == "":
 559                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 560                 else:
 561                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 562         if name != "bibitem":
 563             if preview_line != "":
 564                 lines.append(preview_line)
 565             lines.append('')
 566             lines.append('\\end_inset')
 567         document.body[i:j+1] = lines
 568         i = j + 1
 569
 570
 571 def revert_nomenclature(document):
 572     " Convert nomenclature entry to ERT. "
 573     regex = re.compile(r'(\S+)\s+(.+)')
 574     i = 0
 575     use_nomencl = 0
 576     while 1:
 577         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 578         if i == -1:
 579             break
 580         use_nomencl = 1
 581         j = find_end_of_inset(document.body, i + 1)
 582         preview_line = ""
 583         symbol = ""
 584         description = ""
 585         prefix = ""
 586         for k in range(i + 1, j):
 587             match = re.match(regex, document.body[k])
 588             if match:
 589                 name = match.group(1)
 590                 value = match.group(2)
 591                 if name == "preview":
 592                     preview_line = document.body[k]
 593                 elif name == "symbol":
 594                     symbol = value.strip('"').replace('\\"', '"')
 595                 elif name == "description":
 596                     description = value.strip('"').replace('\\"', '"')
 597                 elif name == "prefix":
 598                     prefix = value.strip('"').replace('\\"', '"')
 599             elif document.body[k].strip() != "":
 600                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 601         if prefix == "":
 602             command = 'nomenclature{%s}{%s}' % (symbol, description)
 603         else:
 604             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 605         document.body[i:j+1] = ['\\begin_inset ERT',
 606                                 'status collapsed',
 607                                 '',
 608                                 '\\begin_layout %s' % document.default_layout,
 609                                 '',
 610                                 '',
 611                                 '\\backslash',
 612                                 command,
 613                                 '\\end_layout',
 614                                 '',
 615                                 '\\end_inset']
 616         i = i + 11
 617     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 618         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 619         document.preamble.append('\\makenomenclature')
 620
 621
 622 def revert_printnomenclature(document):
 623     " Convert printnomenclature to ERT. "
 624     regex = re.compile(r'(\S+)\s+(.+)')
 625     i = 0
 626     use_nomencl = 0
 627     while 1:
 628         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 629         if i == -1:
 630             break
 631         use_nomencl = 1
 632         j = find_end_of_inset(document.body, i + 1)
 633         preview_line = ""
 634         labelwidth = ""
 635         for k in range(i + 1, j):
 636             match = re.match(regex, document.body[k])
 637             if match:
 638                 name = match.group(1)
 639                 value = match.group(2)
 640                 if name == "preview":
 641                     preview_line = document.body[k]
 642                 elif name == "labelwidth":
 643                     labelwidth = value.strip('"').replace('\\"', '"')
 644             elif document.body[k].strip() != "":
 645                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 646         if labelwidth == "":
 647             command = 'nomenclature{}'
 648         else:
 649             command = 'nomenclature[%s]' % labelwidth
 650         document.body[i:j+1] = ['\\begin_inset ERT',
 651                                 'status collapsed',
 652                                 '',
 653                                 '\\begin_layout %s' % document.default_layout,
 654                                 '',
 655                                 '',
 656                                 '\\backslash',
 657                                 command,
 658                                 '\\end_layout',
 659                                 '',
 660                                 '\\end_inset']
 661         i = i + 11
 662     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 663         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 664         document.preamble.append('\\makenomenclature')
 665
 666
 667 def convert_esint(document):
 668     " Add \\use_esint setting to header. "
 669     i = find_token(document.header, "\\cite_engine", 0)
 670     if i == -1:
 671         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 672         return
 673     # 0 is off, 1 is auto, 2 is on.
 674     document.header.insert(i, '\\use_esint 0')
 675
 676
 677 def revert_esint(document):
 678     " Remove \\use_esint setting from header. "
 679     i = find_token(document.header, "\\use_esint", 0)
 680     if i == -1:
 681         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 682         return
 683     use_esint = document.header[i].split()[1]
 684     del document.header[i]
 685     # 0 is off, 1 is auto, 2 is on.
 686     if (use_esint == 2):
 687         document.preamble.append('\\usepackage{esint}')
 688
 689
 690 def revert_clearpage(document):
 691     " clearpage -> ERT "
 692     i = 0
 693     while 1:
 694         i = find_token(document.body, "\\clearpage", i)
 695         if i == -1:
 696             break
 697         document.body[i:i+1] =  ['\\begin_inset ERT',
 698                                 'status collapsed',
 699                                 '',
 700                                 '\\begin_layout %s' % document.default_layout,
 701                                 '',
 702                                 '',
 703                                 '\\backslash',
 704                                 'clearpage',
 705                                 '\\end_layout',
 706                                 '',
 707                                 '\\end_inset']
 708     i = i + 1
 709
 710
 711 def revert_cleardoublepage(document):
 712     " cleardoublepage -> ERT "
 713     i = 0
 714     while 1:
 715         i = find_token(document.body, "\\cleardoublepage", i)
 716         if i == -1:
 717             break
 718         document.body[i:i+1] =  ['\\begin_inset ERT',
 719                                 'status collapsed',
 720                                 '',
 721                                 '\\begin_layout %s' % document.default_layout,
 722                                 '',
 723                                 '',
 724                                 '\\backslash',
 725                                 'cleardoublepage',
 726                                 '\\end_layout',
 727                                 '',
 728                                 '\\end_inset']
 729     i = i + 1
 730
 731
 732 def convert_lyxline(document):
 733     " remove fontsize commands for \lyxline "
 734     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 735     # to change the line thickness. The new definiton does this so that imported
 736     # \lyxlines would have a different line thickness. The eventual fontsize command
 737     # before \lyxline is therefore removed to get the same output.
 738     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 739                  "large", "Large", "LARGE", "huge", "Huge"]
 740     for n in range(0, len(fontsizes)):
 741         i = 0
 742         k = 0
 743         while i < len(document.body):
 744             i = find_token(document.body, "\\size " + fontsizes[n], i)
 745             k = find_token(document.body, "\\lyxline", i)
 746             # the corresponding fontsize command is always 2 lines before the \lyxline
 747             if (i != -1 and k == i+2):
 748                 document.body[i:i+1] = []
 749             else:
 750                 break
 751         i = i + 1
 752
 753
 754 def revert_encodings(document):
 755     " Set new encodings to auto. "
 756     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 757                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 758                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 759     i = find_token(document.header, "\\inputencoding", 0)
 760     if i == -1:
 761         document.header.append("\\inputencoding auto")
 762     else:
 763         inputenc = get_value(document.header, "\\inputencoding", i)
 764         if inputenc in encodings:
 765             document.header[i] = "\\inputencoding auto"
 766     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 767
 768
 769 def convert_caption(document):
 770     " Convert caption layouts to caption insets. "
 771     i = 0
 772     while 1:
 773         i = find_token(document.body, "\\begin_layout Caption", i)
 774         if i == -1:
 775             return
 776         j = find_end_of_layout(document.body, i)
 777         if j == -1:
 778             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 779             return
 780
 781         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 782         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 783                             "\\begin_inset Caption", "",
 784                             "\\begin_layout %s" % document.default_layout]
 785         i = i + 1
 786
 787
 788 def revert_caption(document):
 789     " Convert caption insets to caption layouts. "
 790     " This assumes that the text class has a caption style. "
 791     i = 0
 792     while 1:
 793         i = find_token(document.body, "\\begin_inset Caption", i)
 794         if i == -1:
 795             return
 796
 797         # We either need to delete the previous \begin_layout line, or we
 798         # need to end the previous layout if this inset is not in the first
 799         # position of the paragraph.
 800         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 801         if layout_before == -1:
 802             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 803             return
 804         layout_line = document.body[layout_before]
 805         del_layout_before = True
 806         l = layout_before + 1
 807         while l < i:
 808             if document.body[l] != "":
 809                 del_layout_before = False
 810                 break
 811             l = l + 1
 812         if del_layout_before:
 813             del document.body[layout_before:i]
 814             i = layout_before
 815         else:
 816             document.body[i:i] = ["\\end_layout", ""]
 817             i = i + 2
 818
 819         # Find start of layout in the inset and end of inset
 820         j = find_token(document.body, "\\begin_layout", i)
 821         if j == -1:
 822             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 823             return
 824         k = find_end_of_inset(document.body, i)
 825         if k == -1:
 826             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 827             return
 828
 829         # We either need to delete the following \end_layout line, or we need
 830         # to restart the old layout if this inset is not at the paragraph end.
 831         layout_after = find_token(document.body, "\\end_layout", k)
 832         if layout_after == -1:
 833             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 834             return
 835         del_layout_after = True
 836         l = k + 1
 837         while l < layout_after:
 838             if document.body[l] != "":
 839                 del_layout_after = False
 840                 break
 841             l = l + 1
 842         if del_layout_after:
 843             del document.body[k+1:layout_after+1]
 844         else:
 845             document.body[k+1:k+1] = [layout_line, ""]
 846
 847         # delete \begin_layout and \end_inset and replace \begin_inset with
 848         # "\begin_layout Caption". This works because we can only have one
 849         # paragraph in the caption inset: The old \end_layout will be recycled.
 850         del document.body[k]
 851         if document.body[k] == "":
 852             del document.body[k]
 853         del document.body[j]
 854         if document.body[j] == "":
 855             del document.body[j]
 856         document.body[i] = "\\begin_layout Caption"
 857         if document.body[i+1] == "":
 858             del document.body[i+1]
 859         i = i + 1
 860
 861
 862 # Accents of InsetLaTeXAccent
 863 accent_map = {
 864     "`" : u'\u0300', # grave
 865     "'" : u'\u0301', # acute
 866     "^" : u'\u0302', # circumflex
 867     "~" : u'\u0303', # tilde
 868     "=" : u'\u0304', # macron
 869     "u" : u'\u0306', # breve
 870     "." : u'\u0307', # dot above
 871     "\"": u'\u0308', # diaresis
 872     "r" : u'\u030a', # ring above
 873     "H" : u'\u030b', # double acute
 874     "v" : u'\u030c', # caron
 875     "b" : u'\u0320', # minus sign below
 876     "d" : u'\u0323', # dot below
 877     "c" : u'\u0327', # cedilla
 878     "k" : u'\u0328', # ogonek
 879     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 880                      # only one is given as argument, so we don't need to
 881                      # treat it differently.
 882 }
 883
 884
 885 # special accents of InsetLaTeXAccent without argument
 886 special_accent_map = {
 887     'i' : u'\u0131', # dotless i
 888     'j' : u'\u0237', # dotless j
 889     'l' : u'\u0142', # l with stroke
 890     'L' : u'\u0141'  # L with stroke
 891 }
 892
 893
 894 # special accent arguments of InsetLaTeXAccent
 895 accented_map = {
 896     '\\i' : u'\u0131', # dotless i
 897     '\\j' : u'\u0237'  # dotless j
 898 }
 899
 900
 901 def _convert_accent(accent, accented_char):
 902     type = accent
 903     char = accented_char
 904     if char == '':
 905         if type in special_accent_map:
 906             return special_accent_map[type]
 907         # a missing char is treated as space by LyX
 908         char = ' '
 909     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 910         # Special caron, only used with t, d, l and L.
 911         # It is not in the map because we convert it to the same unicode
 912         # character as the normal caron: \q{} is only defined if babel with
 913         # the czech or slovak language is used, and the normal caron
 914         # produces the correct output if the T1 font encoding is used.
 915         # For the same reason we never convert to \q{} in the other direction.
 916         type = 'v'
 917     elif char in accented_map:
 918         char = accented_map[char]
 919     elif (len(char) > 1):
 920         # We can only convert accents on a single char
 921         return ''
 922     a = accent_map.get(type)
 923     if a:
 924         return unicodedata.normalize("NFKC", "%s%s" % (char, a))
 925     return ''
 926
 927
 928 def convert_ertbackslash(body, i, ert, default_layout):
 929     r""" -------------------------------------------------------------------------------------------
 930     Convert backslashes and '\n' into valid ERT code, append the converted
 931     text to body[i] and return the (maybe incremented) line index i"""
 932
 933     for c in ert:
 934         if c == '\\':
 935             body[i] = body[i] + '\\backslash '
 936             i = i + 1
 937             body.insert(i, '')
 938         elif c == '\n':
 939             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 940             i = i + 4
 941         else:
 942             body[i] = body[i] + c
 943     return i
 944
 945
 946 def convert_accent(document):
 947     # The following forms are supported by LyX:
 948     # '\i \"{a}' (standard form, as written by LyX)
 949     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 950     # '\i \"{ }' (also accepted if the accented char is a space)
 951     # '\i \" a'  (also accepted)
 952     # '\i \"'    (also accepted)
 953     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 954     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 955     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 956     i = 0
 957     while 1:
 958         i = find_re(document.body, re_wholeinset, i)
 959         if i == -1:
 960             return
 961         match = re_wholeinset.match(document.body[i])
 962         prefix = match.group(1)
 963         contents = match.group(3).strip()
 964         match = re_contents.match(contents)
 965         if match:
 966             # Strip first char (always \)
 967             accent = match.group(1)[1:]
 968             accented_contents = match.group(2).strip()
 969             match = re_accentedcontents.match(accented_contents)
 970             accented_char = match.group(1)
 971             converted = _convert_accent(accent, accented_char)
 972             if converted == '':
 973                 # Normalize contents
 974                 contents = '%s{%s}' % (accent, accented_char),
 975             else:
 976                 document.body[i] = '%s%s' % (prefix, converted)
 977                 i += 1
 978                 continue
 979         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 980         document.body[i] = prefix
 981         document.body[i+1:i+1] = ['\\begin_inset ERT',
 982                                   'status collapsed',
 983                                   '',
 984                                   '\\begin_layout %s' % document.default_layout,
 985                                   '',
 986                                   '',
 987                                   '']
 988         i = convert_ertbackslash(document.body, i + 7,
 989                                  '\\%s' % contents,
 990                                  document.default_layout)
 991         document.body[i+1:i+1] = ['\\end_layout',
 992                                   '',
 993                                   '\\end_inset']
 994         i += 3
 995
 996
 997 def revert_accent(document):
 998     inverse_accent_map = {}
 999     for k in accent_map:
1000         inverse_accent_map[accent_map[k]] = k
1001     inverse_special_accent_map = {}
1002     for k in special_accent_map:
1003         inverse_special_accent_map[special_accent_map[k]] = k
1004     inverse_accented_map = {}
1005     for k in accented_map:
1006         inverse_accented_map[accented_map[k]] = k
1007
1008     # Since LyX may insert a line break within a word we must combine all
1009     # words before unicode normalization.
1010     # We do this only if the next line starts with an accent, otherwise we
1011     # would create things like '\begin_inset ERTstatus'.
1012     numberoflines = len(document.body)
1013     for i in range(numberoflines-1):
1014         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1015             continue
1016         if (document.body[i+1][0] in inverse_accent_map):
1017             # the last character of this line and the first of the next line
1018             # form probably a surrogate pair.
1019             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1020                 document.body[i] += document.body[i+1][0]
1021                 document.body[i+1] = document.body[i+1][1:]
1022
1023     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1024     # This is needed to catch all accented characters.
1025     for i in range(numberoflines):
1026         # Unfortunately we have a mixture of unicode strings and plain strings,
1027         # because we never use u'xxx' for string literals, but 'xxx'.
1028         # Therefore we may have to try two times to normalize the data.
1029         try:
1030             document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1031         except TypeError:
1032             document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1033
1034     # Replace accented characters with InsetLaTeXAccent
1035     # Do not convert characters that can be represented in the chosen
1036     # encoding.
1037     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1038     lang_re = re.compile(r"^\\lang\s(\S+)")
1039     for i in range(len(document.body)):
1040
1041         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1042             # Track the encoding of the current line
1043             result = lang_re.match(document.body[i])
1044             if result:
1045                 language = result.group(1)
1046                 if language == "default":
1047                     encoding_stack[-1] = document.encoding
1048                 else:
1049                     from lyx2lyx_lang import lang
1050                     encoding_stack[-1] = lang[language][3]
1051                 continue
1052             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1053                 encoding_stack.append(encoding_stack[-1])
1054                 continue
1055             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1056                 del encoding_stack[-1]
1057                 continue
1058
1059         for j in range(len(document.body[i])):
1060             # dotless i and dotless j are both in special_accent_map and can
1061             # occur as an accented character, so we need to test that the
1062             # following character is no accent
1063             if (document.body[i][j] in inverse_special_accent_map and
1064                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1065                 accent = document.body[i][j]
1066                 try:
1067                     dummy = accent.encode(encoding_stack[-1])
1068                 except UnicodeEncodeError:
1069                     # Insert the rest of the line as new line
1070                     if j < len(document.body[i]) - 1:
1071                         document.body[i+1:i+1] = document.body[i][j+1:]
1072                     # Delete the accented character
1073                     if j > 0:
1074                         document.body[i] = document.body[i][:j-1]
1075                     else:
1076                         document.body[i] = u''
1077                     # Finally add the InsetLaTeXAccent
1078                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1079                     break
1080             elif j > 0 and document.body[i][j] in inverse_accent_map:
1081                 accented_char = document.body[i][j-1]
1082                 if accented_char == ' ':
1083                     # Conform to LyX output
1084                     accented_char = ''
1085                 elif accented_char in inverse_accented_map:
1086                     accented_char = inverse_accented_map[accented_char]
1087                 accent = document.body[i][j]
1088                 try:
1089                     dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1090                 except UnicodeEncodeError:
1091                     # Insert the rest of the line as new line
1092                     if j < len(document.body[i]) - 1:
1093                         document.body[i+1:i+1] = document.body[i][j+1:]
1094                     # Delete the accented characters
1095                     if j > 1:
1096                         document.body[i] = document.body[i][:j-2]
1097                     else:
1098                         document.body[i] = u''
1099                     # Finally add the InsetLaTeXAccent
1100                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1101                     break
1102     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1103     for i in range(numberoflines):
1104         document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1105
1106
1107 def normalize_font_whitespace_259(document):
1108     """ Before format 259 the font changes were ignored if a
1109     whitespace was the first or last character in the sequence, this function
1110     transfers the whitespace outside."""
1111
1112     char_properties = {"\\series": "default",
1113                        "\\emph": "default",
1114                        "\\color": "none",
1115                        "\\shape": "default",
1116                        "\\bar": "default",
1117                        "\\family": "default"}
1118     return normalize_font_whitespace(document, char_properties)
1119
1120 def normalize_font_whitespace_274(document):
1121     """ Before format 259 (sic) the font changes were ignored if a
1122     whitespace was the first or last character in the sequence. This was
1123     corrected for most font properties in format 259, but the language
1124     was forgotten then. This function applies the same conversion done
1125     there (namely, transfers the whitespace outside) for font language
1126     changes, as well."""
1127
1128     char_properties = {"\\lang": "default"}
1129     return normalize_font_whitespace(document, char_properties)
1130
1131 def get_paragraph_language(document, i):
1132     """ Return the language of the paragraph in which line i of the document
1133     body is. If the first thing in the paragraph is a \\lang command, that
1134     is the paragraph's langauge; otherwise, the paragraph's language is the
1135     document's language."""
1136
1137     lines = document.body
1138
1139     first_nonempty_line = \
1140         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1141
1142     words = lines[first_nonempty_line].split()
1143
1144     if len(words) > 1 and words[0] == "\\lang":
1145         return words[1]
1146     else:
1147         return document.language
1148
1149 def normalize_font_whitespace(document, char_properties):
1150     """ Before format 259 the font changes were ignored if a
1151     whitespace was the first or last character in the sequence, this function
1152     transfers the whitespace outside. Only a change in one of the properties
1153     in the provided     char_properties is handled by this function."""
1154
1155     if document.backend != "latex":
1156         return
1157
1158     lines = document.body
1159
1160     changes = {}
1161
1162     i = 0
1163     while i < len(lines):
1164         words = lines[i].split()
1165
1166         if len(words) > 0 and words[0] == "\\begin_layout":
1167             # a new paragraph resets all font changes
1168             changes.clear()
1169             # also reset the default language to be the paragraph's language
1170             if "\\lang" in char_properties.keys():
1171                 char_properties["\\lang"] = \
1172                     get_paragraph_language(document, i + 1)
1173
1174         elif len(words) > 1 and words[0] in char_properties.keys():
1175             # we have a font change
1176             if char_properties[words[0]] == words[1]:
1177                 # property gets reset
1178                 if words[0] in changes.keys():
1179                     del changes[words[0]]
1180                 defaultproperty = True
1181             else:
1182                 # property gets set
1183                 changes[words[0]] = words[1]
1184                 defaultproperty = False
1185
1186             # We need to explicitly reset all changed properties if we find
1187             # a space below, because LyX 1.4 would output the space after
1188             # closing the previous change and before starting the new one,
1189             # and closing a font change means to close all properties, not
1190             # just the changed one.
1191
1192             if lines[i-1] and lines[i-1][-1] == " ":
1193                 lines[i-1] = lines[i-1][:-1]
1194                 # a space before the font change
1195                 added_lines = [" "]
1196                 for k in changes.keys():
1197                     # exclude property k because that is already in lines[i]
1198                     if k != words[0]:
1199                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1200                 for k in changes.keys():
1201                     # exclude property k because that must be added below anyway
1202                     if k != words[0]:
1203                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1204                 if defaultproperty:
1205                     # Property is reset in lines[i], so add the new stuff afterwards
1206                     lines[i+1:i+1] = added_lines
1207                 else:
1208                     # Reset property for the space
1209                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1210                     lines[i:i] = added_lines
1211                 i = i + len(added_lines)
1212
1213             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1214                 # a space after the font change
1215                 if (lines[i+1] == " " and lines[i+2]):
1216                     next_words = lines[i+2].split()
1217                     if len(next_words) > 0 and next_words[0] == words[0]:
1218                         # a single blank with a property different from the
1219                         # previous and the next line must not be changed
1220                         i = i + 2
1221                         continue
1222                 lines[i+1] = lines[i+1][1:]
1223                 added_lines = [" "]
1224                 for k in changes.keys():
1225                     # exclude property k because that is already in lines[i]
1226                     if k != words[0]:
1227                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1228                 for k in changes.keys():
1229                     # exclude property k because that must be added below anyway
1230                     if k != words[0]:
1231                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1232                 # Reset property for the space
1233                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1234                 lines[i:i] = added_lines
1235                 i = i + len(added_lines)
1236
1237         i = i + 1
1238
1239
1240 def revert_utf8x(document):
1241     " Set utf8x encoding to utf8. "
1242     i = find_token(document.header, "\\inputencoding", 0)
1243     if i == -1:
1244         document.header.append("\\inputencoding auto")
1245     else:
1246         inputenc = get_value(document.header, "\\inputencoding", i)
1247         if inputenc == "utf8x":
1248             document.header[i] = "\\inputencoding utf8"
1249     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1250
1251
1252 def revert_utf8plain(document):
1253     " Set utf8plain encoding to utf8. "
1254     i = find_token(document.header, "\\inputencoding", 0)
1255     if i == -1:
1256         document.header.append("\\inputencoding auto")
1257     else:
1258         inputenc = get_value(document.header, "\\inputencoding", i)
1259         if inputenc == "utf8-plain":
1260             document.header[i] = "\\inputencoding utf8"
1261     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1262
1263
1264 def revert_beamer_alert(document):
1265     " Revert beamer's \\alert inset back to ERT. "
1266     i = 0
1267     while 1:
1268         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1269         if i == -1:
1270             return
1271         document.body[i] = "\\begin_inset ERT"
1272         i = i + 1
1273         while 1:
1274             if (document.body[i][:13] == "\\begin_layout"):
1275                 # Insert the \alert command
1276                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1277                 break
1278             i = i + 1
1279
1280         i = i + 1
1281
1282
1283 def revert_beamer_structure(document):
1284     " Revert beamer's \\structure inset back to ERT. "
1285     i = 0
1286     while 1:
1287         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1288         if i == -1:
1289             return
1290         document.body[i] = "\\begin_inset ERT"
1291         i = i + 1
1292         while 1:
1293             if (document.body[i][:13] == "\\begin_layout"):
1294                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1295                 break
1296             i = i + 1
1297
1298         i = i + 1
1299
1300
1301 def convert_changes(document):
1302     " Switch output_changes off if tracking_changes is off. "
1303     i = find_token(document.header, '\\tracking_changes', 0)
1304     if i == -1:
1305         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1306         return
1307     j = find_token(document.header, '\\output_changes', 0)
1308     if j == -1:
1309         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1310         return
1311     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1312     output_changes = get_value(document.header, "\\output_changes", j)
1313     if tracking_changes == "false" and output_changes == "true":
1314         document.header[j] = "\\output_changes false"
1315
1316
1317 def revert_ascii(document):
1318     " Set ascii encoding to auto. "
1319     i = find_token(document.header, "\\inputencoding", 0)
1320     if i == -1:
1321         document.header.append("\\inputencoding auto")
1322     else:
1323         inputenc = get_value(document.header, "\\inputencoding", i)
1324         if inputenc == "ascii":
1325             document.header[i] = "\\inputencoding auto"
1326     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1327
1328
1329 def normalize_language_name(document):
1330     lang = { "brazil": "brazilian",
1331              "portuges": "portuguese"}
1332
1333     if document.language in lang:
1334         document.language = lang[document.language]
1335         i = find_token(document.header, "\\language", 0)
1336         document.header[i] = "\\language %s" % document.language
1337
1338
1339 def revert_language_name(document):
1340     lang = { "brazilian": "brazil",
1341              "portuguese": "portuges"}
1342
1343     if document.language in lang:
1344         document.language = lang[document.language]
1345         i = find_token(document.header, "\\language", 0)
1346         document.header[i] = "\\language %s" % document.language
1347
1348 #
1349 #  \textclass cv -> \textclass simplecv
1350 def convert_cv_textclass(document):
1351     if document.textclass == "cv":
1352         document.textclass = "simplecv"
1353
1354
1355 def revert_cv_textclass(document):
1356     if document.textclass == "simplecv":
1357         document.textclass = "cv"
1358
1359
1360 #
1361 # add scaleBeforeRotation graphics param
1362 def convert_graphics_rotation(document):
1363     " add scaleBeforeRotation graphics parameter. "
1364     i = 0
1365     while 1:
1366         i = find_token(document.body, "\\begin_inset Graphics", i)
1367         if i == -1:
1368             return
1369         j = find_end_of_inset(document.body, i+1)
1370         if j == -1:
1371             # should not happen
1372             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1373         # Seach for rotateAngle and width or height or scale
1374         # If these params are not there, nothing needs to be done.
1375         # FIXME: this also inserts scaleBeforeRotation if "rotateAngle" is not there!
1376         for k in range(i+1, j):
1377             if (document.body[k].find("rotateAngle") and \
1378                 (document.body[k].find("width") or \
1379                 document.body[k].find("height") or \
1380                 document.body[k].find("scale"))):
1381                         document.body.insert(j, 'scaleBeforeRotation')
1382         i = i + 1
1383
1384
1385 # FIXME: does not work at all
1386 def revert_graphics_rotation(document):
1387     " remove scaleBeforeRotation graphics parameter. "
1388     i = 0
1389     while 1:
1390         i = find_token(document.body, "\\begin_inset Graphics", i)
1391         if i == -1:
1392             return
1393         j = find_end_of_inset(document.body, i + 1)
1394         if j == -1:
1395             # should not happen
1396             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1397         for k in range(i+1, j):
1398             # If there's a scaleBeforeRotation param, just remove that
1399             if document.body[k].find('scaleBeforeRotation'):
1400                 del document.body[k]
1401                 break
1402             # if not, and if we have rotateAngle and width or height or scale,
1403             # we have to put the rotateAngle value to special
1404             rotateAngle = get_value(document.body, 'rotateAngle', i+1, j)
1405             special = get_value(document.body, 'special', i+1, j)
1406             if (document.body[k].find("width") or \
1407                 document.body[k].find("height") or \
1408                 document.body[k].find("scale") and \
1409                 document.body[k].find("rotateAngle")):
1410                     if special == "":
1411                         document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1412                     else:
1413                         l = find_token(document.body, "special", i+1, j)
1414                         document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1415         i = i + 1
1416
1417
1418
1419 def convert_tableborder(document):
1420     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1421     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1422     i = 0
1423     while i < len(document.body):
1424         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1425         k = document.body[i].find("|>{", 0, len(document.body[i]))
1426         # the two tokens have to be in one line
1427         if (h != -1 and k != -1):
1428             # delete the "|"
1429             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1430         i = i + 1
1431
1432
1433 def revert_tableborder(document):
1434     i = 0
1435     while i < len(document.body):
1436         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1437         k = document.body[i].find(">{", 0, len(document.body[i]))
1438         # the two tokens have to be in one line
1439         if (h != -1 and k != -1):
1440             # add the "|"
1441             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1442         i = i + 1
1443
1444
1445 def revert_armenian(document):
1446
1447     # set inputencoding from armscii8 to auto
1448     if document.inputencoding == "armscii8":
1449         i = find_token(document.header, "\\inputencoding", 0)
1450         if i != -1:
1451             document.header[i] = "\\inputencoding auto"
1452     # check if preamble exists, if not k is set to -1
1453     i = 0
1454     k = -1
1455     while i < len(document.preamble):
1456         if k == -1:
1457             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1458         if k == -1:
1459             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1460         i = i + 1
1461     # add the entry \usepackage{armtex} to the document preamble
1462     if document.language == "armenian":
1463         # set the armtex entry as the first preamble line
1464         if k != -1:
1465             document.preamble[0:0] = ["\\usepackage{armtex}"]
1466         # create the preamble when it doesn't exist
1467         else:
1468             document.preamble.append('\\usepackage{armtex}')
1469     # Set document language from armenian to english
1470     if document.language == "armenian":
1471         document.language = "english"
1472         i = find_token(document.header, "\\language", 0)
1473         if i != -1:
1474             document.header[i] = "\\language english"
1475
1476
1477 def revert_CJK(document):
1478     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1479     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1480                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1481     i = find_token(document.header, "\\inputencoding", 0)
1482     if i == -1:
1483         document.header.append("\\inputencoding auto")
1484     else:
1485         inputenc = get_value(document.header, "\\inputencoding", i)
1486         if inputenc in encodings:
1487             document.header[i] = "\\inputencoding default"
1488     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1489
1490     if document.language == "chinese-simplified" or \
1491        document.language == "chinese-traditional" or \
1492        document.language == "japanese" or document.language == "korean":
1493         document.language = "english"
1494         i = find_token(document.header, "\\language", 0)
1495         if i != -1:
1496             document.header[i] = "\\language english"
1497
1498
1499 def revert_preamble_listings_params(document):
1500     " Revert preamble option \listings_params "
1501     i = find_token(document.header, "\\listings_params", 0)
1502     if i != -1:
1503         document.preamble.append('\\usepackage{listings}')
1504         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1505         document.header.pop(i);
1506
1507
1508 def revert_listings_inset(document):
1509     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1510 FROM
1511
1512 \begin_inset
1513 lstparams "language=Delphi"
1514 inline true
1515 status open
1516
1517 \begin_layout Standard
1518 var i = 10;
1519 \end_layout
1520
1521 \end_inset
1522
1523 TO
1524
1525 \begin_inset ERT
1526 status open
1527 \begin_layout Standard
1528
1529
1530 \backslash
1531 lstinline[language=Delphi]{var i = 10;}
1532 \end_layout
1533
1534 \end_inset
1535
1536 There can be an caption inset in this inset
1537
1538 \begin_layout Standard
1539 \begin_inset Caption
1540
1541 \begin_layout Standard
1542 before label
1543 \begin_inset LatexCommand label
1544 name "lst:caption"
1545
1546 \end_inset
1547
1548 after label
1549 \end_layout
1550
1551 \end_inset
1552
1553
1554 \end_layout
1555
1556 '''
1557     i = 0
1558     while True:
1559         i = find_token(document.body, '\\begin_inset listings', i)
1560         if i == -1:
1561             break
1562         else:
1563             if not '\\usepackage{listings}' in document.preamble:
1564                 document.preamble.append('\\usepackage{listings}')
1565         j = find_end_of_inset(document.body, i + 1)
1566         if j == -1:
1567             # this should not happen
1568             break
1569         inline = 'false'
1570         params = ''
1571         status = 'open'
1572         # first three lines
1573         for line in range(i + 1, i + 4):
1574             if document.body[line].startswith('inline'):
1575                 inline = document.body[line].split()[1]
1576             if document.body[line].startswith('lstparams'):
1577                 params = document.body[line].split()[1].strip('"')
1578             if document.body[line].startswith('status'):
1579                 status = document.body[line].split()[1].strip()
1580                 k = line + 1
1581         # caption?
1582         caption = ''
1583         label = ''
1584         cap = find_token(document.body, '\\begin_inset Caption', i)
1585         if cap != -1:
1586             cap_end = find_end_of_inset(document.body, cap + 1)
1587             if cap_end == -1:
1588                 # this should not happen
1589                 break
1590             # label?
1591             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1592             if lbl != -1:
1593                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1594                 if lbl_end == -1:
1595                     # this should not happen
1596                     break
1597             else:
1598                 lbl = cap_end
1599                 lbl_end = cap_end
1600             for line in document.body[lbl : lbl_end + 1]:
1601                 if line.startswith('name '):
1602                     label = line.split()[1].strip('"')
1603                     break
1604             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1605                 if not line.startswith('\\'):
1606                     caption += line.strip()
1607             k = cap_end + 1
1608         inlinecode = ''
1609         # looking for the oneline code for lstinline
1610         inlinecode = document.body[find_end_of_layout(document.body,
1611             find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
1612         if len(caption) > 0:
1613             if len(params) == 0:
1614                 params = 'caption={%s}' % caption
1615             else:
1616                 params += ',caption={%s}' % caption
1617         if len(label) > 0:
1618             if len(params) == 0:
1619                 params = 'label={%s}' % label
1620             else:
1621                 params += ',label={%s}' % label
1622         if len(params) > 0:
1623             params = '[%s]' % params
1624             params = params.replace('\\', '\\backslash\n')
1625         if inline == 'true':
1626             document.body[i:(j+1)] = [r'\begin_inset ERT',
1627                                       'status %s' % status,
1628                                       r'\begin_layout Standard',
1629                                       '',
1630                                       '',
1631                                       r'\backslash',
1632                                       'lstinline%s{%s}' % (params, inlinecode),
1633                                       r'\end_layout',
1634                                       '',
1635                                       r'\end_inset']
1636         else:
1637             document.body[i: j+1] =  [r'\begin_inset ERT',
1638                                       'status %s' % status,
1639                                       '',
1640                                       r'\begin_layout Standard',
1641                                       '',
1642                                       '',
1643                                       r'\backslash',
1644                                       r'begin{lstlisting}%s' % params,
1645                                       r'\end_layout'
1646                                     ] + document.body[k : j - 1] + \
1647                                      ['',
1648                                       r'\begin_layout Standard',
1649                                       '',
1650                                       r'\backslash',
1651                                       'end{lstlisting}',
1652                                       r'\end_layout',
1653                                       '',
1654                                       r'\end_inset']
1655
1656
1657 def revert_include_listings(document):
1658     r''' Revert lstinputlisting Include option , translate
1659 \begin_inset Include \lstinputlisting{file}[opt]
1660 preview false
1661
1662 \end_inset
1663
1664 TO
1665
1666 \begin_inset ERT
1667 status open
1668
1669 \begin_layout Standard
1670
1671
1672 \backslash
1673 lstinputlisting{file}[opt]
1674 \end_layout
1675
1676 \end_inset
1677     '''
1678
1679     i = 0
1680     while True:
1681         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1682         if i == -1:
1683             break
1684         else:
1685             if not '\\usepackage{listings}' in document.preamble:
1686                 document.preamble.append('\\usepackage{listings}')
1687         j = find_end_of_inset(document.body, i + 1)
1688         if j == -1:
1689             # this should not happen
1690             break
1691         # find command line lstinputlisting{file}[options]
1692         cmd, file, option = '', '', ''
1693         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1694             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1695         option = option.replace('\\', '\\backslash\n')
1696         document.body[i : j + 1] = [r'\begin_inset ERT',
1697                                     'status open',
1698                                     '',
1699                                     r'\begin_layout Standard',
1700                                     '',
1701                                     '',
1702                                     r'\backslash',
1703                                     '%s%s{%s}' % (cmd, option, file),
1704                                     r'\end_layout',
1705                                     '',
1706                                     r'\end_inset']
1707
1708
1709 def revert_ext_font_sizes(document):
1710     if document.backend != "latex": return
1711     if not document.textclass.startswith("ext"): return
1712
1713     fontsize = get_value(document.header, '\\paperfontsize', 0)
1714     if fontsize not in ('10', '11', '12'): return
1715     fontsize += 'pt'
1716
1717     i = find_token(document.header, '\\paperfontsize', 0)
1718     document.header[i] = '\\paperfontsize default'
1719
1720     i = find_token(document.header, '\\options', 0)
1721     if i == -1:
1722         i = find_token(document.header, '\\textclass', 0) + 1
1723         document.header[i:i] = ['\\options %s' % fontsize]
1724     else:
1725         document.header[i] += ',%s' % fontsize
1726
1727
1728 def convert_ext_font_sizes(document):
1729     if document.backend != "latex": return
1730     if not document.textclass.startswith("ext"): return
1731
1732     fontsize = get_value(document.header, '\\paperfontsize', 0)
1733     if fontsize != 'default': return
1734
1735     i = find_token(document.header, '\\options', 0)
1736     if i == -1: return
1737
1738     options = get_value(document.header, '\\options', i)
1739
1740     fontsizes = '10pt', '11pt', '12pt'
1741     for fs in fontsizes:
1742         if options.find(fs) != -1:
1743             break
1744     else: # this else will only be attained if the for cycle had no match
1745         return
1746
1747     options = options.split(',')
1748     for j, opt in enumerate(options):
1749         if opt in fontsizes:
1750             fontsize = opt[:-2]
1751             del options[j]
1752             break
1753     else:
1754         return
1755
1756     k = find_token(document.header, '\\paperfontsize', 0)
1757     document.header[k] = '\\paperfontsize %s' % fontsize
1758
1759     if options:
1760         document.header[i] = '\\options %s' % ','.join(options)
1761     else:
1762         del document.header[i]
1763
1764 def revert_separator_layout(document):
1765     r'''Revert --Separator-- to a lyx note
1766 From
1767
1768 \begin_layout --Separator--
1769 something
1770 \end_layout
1771
1772 to
1773
1774 \begin_layout Standard
1775 \begin_inset Note Note
1776 status open
1777
1778 \begin_layout Standard
1779 Separate Evironment
1780 \end_layout
1781
1782 \end_inset
1783 something
1784
1785 \end_layout
1786
1787     '''
1788
1789     i = 0
1790     while True:
1791         i = find_token(document.body, r'\begin_layout --Separator--', i)
1792         if i == -1:
1793             break
1794         j = find_end_of_layout(document.body, i + 1)
1795         if j == -1:
1796             # this should not happen
1797             break
1798         document.body[i : j + 1] = [r'\begin_layout Standard',
1799                                     r'\begin_inset Note Note',
1800                                     'status open',
1801                                     '',
1802                                     r'\begin_layout Standard',
1803                                     'Separate Environment',
1804                                     r'\end_layout',
1805                                     '',
1806                                     r'\end_inset'] + \
1807                                     document.body[ i + 1 : j] + \
1808                                     ['',
1809                                     r'\end_layout'
1810                                     ]
1811
1812 def convert_arabic (document):
1813     if document.language == "arabic":
1814         document.language = "arabic_arabtex"
1815         i = find_token(document.header, "\\language", 0)
1816         if i != -1:
1817             document.header[i] = "\\language arabic_arabtex"
1818     i = 0
1819     while i < len(document.body):
1820         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1821         if (h != -1):
1822             # change the language name
1823             document.body[i] = '\lang arabic_arabtex'
1824         i = i + 1
1825
1826 def revert_arabic (document):
1827     if document.language == "arabic_arabtex":
1828         document.language = "arabic"
1829         i = find_token(document.header, "\\language", 0)
1830         if i != -1:
1831             document.header[i] = "\\language arabic"
1832     i = 0
1833     while i < len(document.body):
1834         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1835         if (h != -1):
1836             # change the language name
1837             document.body[i] = '\lang arabic'
1838         i = i + 1
1839
1840 def revert_unicode(document):
1841     '''Transform unicode symbols according to the unicode list.
1842 Preamble flags are not implemented.
1843 Combination characters are currently ignored.
1844 Forced output is currently not enforced'''
1845     pathname = os.path.dirname(sys.argv[0])
1846     fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
1847     spec_chars = {}
1848     for line in fp.readlines():
1849         if line[0] != '#':
1850             line=line.replace(' "',' ') # remove all qoutation marks with spaces before
1851             line=line.replace('" ',' ') # remove all qoutation marks with spaces after
1852             line=line.replace(r'\"','"') # replace \" by " (for characters with diaresis)
1853             try:
1854                 # flag1 and flag2 are preamble & flags
1855                 # currently NOT implemented
1856                 [ucs4,command,flag1,flag2] =line.split(None,3)
1857                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1858             except:
1859                 pass
1860     fp.close()
1861     # Define strings to start and end ERT and math insets
1862     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
1863     ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
1864     math_intro='\n\\begin_inset Formula $'
1865     math_outro='$\n\\end_inset\n'
1866     # Find unicode characters and replace them
1867     in_ert = 0 # flag set to 1 if in ERT inset
1868     in_math = 0 # flag set to 1 if in math inset
1869     insets = [] # list of active insets
1870     for i, current_line in enumerate(document.body):
1871         if current_line.find('\\begin_inset') > -1:
1872             # check which inset to start
1873             if current_line.find('\\begin_inset ERT') > -1:
1874                 in_ert = 1
1875                 insets.append('ert')
1876             elif current_line.find('\\begin_inset Formula') > -1:
1877                 in_math = 1
1878                 insets.append('math')
1879             else:
1880                 insets.append('other')
1881         if current_line.find('\\end_inset') > -1:
1882             # check which inset to end
1883             try:
1884                 cur_inset = insets.pop()
1885                 if cur_inset == 'ert':
1886                     in_ert = 0
1887                 elif cur_inset == 'math':
1888                     in_math = 0
1889                 else:
1890                     pass # end of other inset
1891             except:
1892                 pass # inset list was empty (for some reason)
1893         current_line=''; # clear to have as container for modified line
1894         for j in range(len(document.body[i])):
1895             if spec_chars.has_key(document.body[i][j]):
1896                 flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
1897                 if flags.find('combining') > -1:
1898                     command = ''
1899                 else:
1900                     command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
1901                     if command[0:2] == '\\\\':
1902                         if command[2:12]=='ensuremath':
1903                             if in_ert == 1:
1904                                 # math in ERT
1905                                 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1906                                 command = command.replace('}', '$\n')
1907                             elif in_math == 0:
1908                                 # add a math inset with the replacement character
1909                                 command = command.replace('\\\\ensuremath{\\', math_intro)
1910                                 command = command.replace('}', math_outro)
1911                             else:
1912                                 # we are already in a math inset
1913                                 command = command.replace('\\\\ensuremath{\\', '')
1914                                 command = command.replace('}', '')
1915                         else:
1916                             if in_math == 1:
1917                                 # avoid putting an ERT in a math; instead put command as text
1918                                 command = command.replace('\\\\', '\mathrm{')
1919                                 command = command + '}'
1920                             elif in_ert == 0:
1921                                 # add an ERT inset with the replacement character
1922                                 command = command.replace('\\\\', ert_intro)
1923                                 command = command + ert_outro
1924                             else:
1925                                 command = command.replace('\\\\', '\n\\backslash\n')
1926                 current_line = current_line + command
1927             else:
1928                 current_line = current_line + document.body[i][j]
1929         document.body[i] = current_line
1930
1931
1932 ##
1933 # Conversion hub
1934 #
1935
1936 supported_versions = ["1.5.0","1.5"]
1937 convert = [[246, []],
1938            [247, [convert_font_settings]],
1939            [248, []],
1940            [249, [convert_utf8]],
1941            [250, []],
1942            [251, []],
1943            [252, [convert_commandparams, convert_bibitem]],
1944            [253, []],
1945            [254, [convert_esint]],
1946            [255, []],
1947            [256, []],
1948            [257, [convert_caption]],
1949            [258, [convert_lyxline]],
1950            [259, [convert_accent, normalize_font_whitespace_259]],
1951            [260, []],
1952            [261, [convert_changes]],
1953            [262, []],
1954            [263, [normalize_language_name]],
1955            [264, [convert_cv_textclass]],
1956            [265, [convert_tableborder]],
1957            [266, []],
1958            [267, []],
1959            [268, []],
1960            [269, []],
1961            [270, []],
1962            [271, [convert_ext_font_sizes]],
1963            [272, []],
1964            [273, []],
1965            [274, [normalize_font_whitespace_274]],
1966            [275, [convert_graphics_rotation]],
1967            [276, [convert_arabic]]
1968           ]
1969
1970 revert =  [
1971            [275, [revert_arabic]],
1972            [274, [revert_graphics_rotation]],
1973            [273, []],
1974            [272, [revert_separator_layout]],
1975            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1976            [270, [revert_ext_font_sizes]],
1977            [269, [revert_beamer_alert, revert_beamer_structure]],
1978            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1979            [267, [revert_CJK]],
1980            [266, [revert_utf8plain]],
1981            [265, [revert_armenian]],
1982            [264, [revert_tableborder]],
1983            [263, [revert_cv_textclass]],
1984            [262, [revert_language_name]],
1985            [261, [revert_ascii]],
1986            [260, []],
1987            [259, [revert_utf8x]],
1988            [258, []],
1989            [257, []],
1990            [256, [revert_caption]],
1991            [255, [revert_encodings]],
1992            [254, [revert_clearpage, revert_cleardoublepage]],
1993            [253, [revert_esint]],
1994            [252, [revert_nomenclature, revert_printnomenclature]],
1995            [251, [revert_commandparams]],
1996            [250, [revert_cs_label]],
1997            [249, []],
1998            [248, [revert_accent, revert_utf8, revert_unicode]],
1999            [247, [revert_booktabs]],
2000            [246, [revert_font_settings]],
2001            [245, [revert_framed]]]
2002
2003
2004 if __name__ == "__main__":
2005     pass
2006
2007