lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if document.inputencoding == "auto" or document.inputencoding == "default":
 257         for i in range(len(document.body)):
 258             result = lang_re.match(document.body[i])
 259             if result:
 260                 language = result.group(1)
 261                 if language == "default":
 262                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 263                     encoding_stack[-1] = document.encoding
 264                 else:
 265                     from lyx2lyx_lang import lang
 266                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 267                     encoding_stack[-1] = lang[language][3]
 268             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 269                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(insets) > 0 and insets[-1] in inset_types:
 271                     from lyx2lyx_lang import lang
 272                     encoding_stack.append(lang[document.language][3])
 273                 else:
 274                     encoding_stack.append(encoding_stack[-1])
 275             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 276                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 277                 if len(encoding_stack) == 1:
 278                     # Don't remove the document encoding from the stack
 279                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 280                 else:
 281                     del encoding_stack[-1]
 282             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 283                 inset_result = inset_re.match(document.body[i])
 284                 if inset_result:
 285                     insets.append(inset_result.group(1))
 286                 else:
 287                     insets.append("")
 288             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 289                 del insets[-1]
 290             if encoding_stack[-1] != document.encoding:
 291                 if forward:
 292                     # This line has been incorrectly interpreted as if it was
 293                     # encoded in 'encoding'.
 294                     # Convert back to the 8bit string that was in the file.
 295                     orig = document.body[i].encode(document.encoding)
 296                     # Convert the 8bit string that was in the file to unicode
 297                     # with the correct encoding.
 298                     document.body[i] = orig.decode(encoding_stack[-1])
 299                 else:
 300                     # Convert unicode to the 8bit string that will be written
 301                     # to the file with the correct encoding.
 302                     orig = document.body[i].encode(encoding_stack[-1])
 303                     # Convert the 8bit string that will be written to the
 304                     # file to fake unicode with the encoding that will later
 305                     # be used when writing to the file.
 306                     document.body[i] = orig.decode(document.encoding)
 307
 308
 309 def convert_utf8(document):
 310     " Set document encoding to UTF-8. "
 311     convert_multiencoding(document, True)
 312     document.encoding = "utf8"
 313
 314
 315 def revert_utf8(document):
 316     " Set document encoding to the value corresponding to inputencoding. "
 317     i = find_token(document.header, "\\inputencoding", 0)
 318     if i == -1:
 319         document.header.append("\\inputencoding auto")
 320     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 321         document.header[i] = "\\inputencoding auto"
 322     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 323     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 324     convert_multiencoding(document, False)
 325
 326
 327 def read_unicodesymbols():
 328     " Read the unicodesymbols list of unicode characters and corresponding commands."
 329     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 330     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 331     spec_chars = {}
 332     for line in fp.readlines():
 333         if line[0] != '#':
 334             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 335             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 336             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 337             try:
 338                 # flag1 and flag2 are preamble and other flags
 339                 [ucs4,command,flag1,flag2] =line.split(None,3)
 340                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 341             except:
 342                 pass
 343     fp.close()
 344
 345     return spec_chars
 346
 347
 348 def revert_unicode(document):
 349     '''Transform unicode characters that can not be written using the
 350 document encoding to commands according to the unicodesymbols
 351 file. Characters that can not be replaced by commands are replaced by
 352 an replacement string.  Flags other than 'combined' are currently not
 353 implemented.'''
 354
 355     replacement_character = '???'
 356     spec_chars = read_unicodesymbols()
 357
 358     # Define strings to start and end ERT and math insets
 359     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 360     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 361     math_intro='\n\\begin_inset Formula $'
 362     math_outro='$\n\\end_inset'
 363     # Find unicode characters and replace them
 364     in_ert = False # flag set to 1 if in ERT inset
 365     in_math = False # flag set to 1 if in math inset
 366     insets = [] # list of active insets
 367
 368     # Go through the file to capture all combining characters
 369     last_char = '' # to store the previous character
 370
 371     i = 0
 372     while i < len(document.body):
 373         line = document.body[i]
 374         # Check for insets
 375         if line.find('\\begin_inset') > -1:
 376             insets.append(line[13:].split()[0])
 377         if line.find('\\end_inset') > -1:
 378             del insets[-1]
 379
 380         # Try to write the line
 381         try:
 382             # If all goes well the line is written here
 383             dummy = line.encode(document.encoding)
 384             last_char = line[-1]
 385             i += 1
 386         except:
 387             # Error, some character(s) in the line need to be replaced
 388             mod_line = u''
 389             for character in line:
 390                 try:
 391                     # Try to write the character
 392                     dummy = character.encode(document.encoding)
 393                     mod_line += character
 394                     last_char = character
 395                 except:
 396                     # Try to replace with ERT/math inset
 397                     if spec_chars.has_key(character):
 398                         command = spec_chars[character][0] # the command to replace unicode
 399                         flag1 = spec_chars[character][1]
 400                         flag2 = spec_chars[character][2]
 401                         if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 402                             # We have a character that should be combined with the previous
 403                             command += '{' + last_char + '}'
 404                             # Remove the last character. Ignore if it is whitespace
 405                             if len(last_char.rstrip()):
 406                                 # last_char was found and is not whitespace
 407                                 if mod_line:
 408                                     mod_line = mod_line[:-1]
 409                                 else: # last_char belongs to the last line
 410                                     document.body[i-1] = document.body[i-1][:-1]
 411                             else:
 412                                 # The last character was replaced by a command. For now it is
 413                                 # ignored. This could be handled better.
 414                                 pass
 415                         if command[0:2] == '\\\\':
 416                             if command[2:12]=='ensuremath':
 417                                 if insets[-1] == "ERT":
 418                                     # math in ERT
 419                                     command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 420                                     command = command.replace('}', '$\n')
 421                                 elif insets[-1] != "Formula":
 422                                     # add a math inset with the replacement character
 423                                     command = command.replace('\\\\ensuremath{\\', math_intro)
 424                                     command = command.replace('}', math_outro)
 425                                 else:
 426                                     # we are already in a math inset
 427                                     command = command.replace('\\\\ensuremath{\\', '')
 428                                     command = command.replace('}', '')
 429                             else:
 430                                 if insets[-1] == "Formula":
 431                                     # avoid putting an ERT in a math; instead put command as text
 432                                     command = command.replace('\\\\', '\mathrm{')
 433                                     command = command + '}'
 434                                 elif insets[-1] != "ERT":
 435                                     # add an ERT inset with the replacement character
 436                                     command = command.replace('\\\\', ert_intro)
 437                                     command = command + ert_outro
 438                                 else:
 439                                     command = command.replace('\\\\', '\n\\backslash\n')
 440                             last_char = '' # indicate that the character should not be removed
 441                         mod_line += command
 442                     else:
 443                         # Replace with replacement string
 444                         mod_line += replacement_character
 445             document.body[i:i+1] = mod_line.split('\n')
 446             i += len(mod_line.split('\n'))
 447
 448
 449 def revert_cs_label(document):
 450     " Remove status flag of charstyle label. "
 451     i = 0
 452     while 1:
 453         i = find_token(document.body, "\\begin_inset CharStyle", i)
 454         if i == -1:
 455             return
 456         # Seach for a line starting 'show_label'
 457         # If it is not there, break with a warning message
 458         i = i + 1
 459         while 1:
 460             if (document.body[i][:10] == "show_label"):
 461                 del document.body[i]
 462                 break
 463             elif (document.body[i][:13] == "\\begin_layout"):
 464                 document.warning("Malformed LyX document: Missing 'show_label'.")
 465                 break
 466             i = i + 1
 467
 468         i = i + 1
 469
 470
 471 def convert_bibitem(document):
 472     """ Convert
 473 \bibitem [option]{argument}
 474
 475 to
 476
 477 \begin_inset LatexCommand bibitem
 478 label "option"
 479 key "argument"
 480
 481 \end_inset
 482
 483 This must be called after convert_commandparams.
 484 """
 485     i = 0
 486     while 1:
 487         i = find_token(document.body, "\\bibitem", i)
 488         if i == -1:
 489             break
 490         j = document.body[i].find('[') + 1
 491         k = document.body[i].rfind(']')
 492         if j == 0: # No optional argument found
 493             option = None
 494         else:
 495             option = document.body[i][j:k]
 496         j = document.body[i].rfind('{') + 1
 497         k = document.body[i].rfind('}')
 498         argument = document.body[i][j:k]
 499         lines = ['\\begin_inset LatexCommand bibitem']
 500         if option != None:
 501             lines.append('label "%s"' % option.replace('"', '\\"'))
 502         lines.append('key "%s"' % argument.replace('"', '\\"'))
 503         lines.append('')
 504         lines.append('\\end_inset')
 505         document.body[i:i+1] = lines
 506         i = i + 1
 507
 508
 509 commandparams_info = {
 510     # command : [option1, option2, argument]
 511     "bibitem" : ["label", "", "key"],
 512     "bibtex" : ["options", "btprint", "bibfiles"],
 513     "cite"        : ["after", "before", "key"],
 514     "citet"       : ["after", "before", "key"],
 515     "citep"       : ["after", "before", "key"],
 516     "citealt"     : ["after", "before", "key"],
 517     "citealp"     : ["after", "before", "key"],
 518     "citeauthor"  : ["after", "before", "key"],
 519     "citeyear"    : ["after", "before", "key"],
 520     "citeyearpar" : ["after", "before", "key"],
 521     "citet*"      : ["after", "before", "key"],
 522     "citep*"      : ["after", "before", "key"],
 523     "citealt*"    : ["after", "before", "key"],
 524     "citealp*"    : ["after", "before", "key"],
 525     "citeauthor*" : ["after", "before", "key"],
 526     "Citet"       : ["after", "before", "key"],
 527     "Citep"       : ["after", "before", "key"],
 528     "Citealt"     : ["after", "before", "key"],
 529     "Citealp"     : ["after", "before", "key"],
 530     "Citeauthor"  : ["after", "before", "key"],
 531     "Citet*"      : ["after", "before", "key"],
 532     "Citep*"      : ["after", "before", "key"],
 533     "Citealt*"    : ["after", "before", "key"],
 534     "Citealp*"    : ["after", "before", "key"],
 535     "Citeauthor*" : ["after", "before", "key"],
 536     "citefield"   : ["after", "before", "key"],
 537     "citetitle"   : ["after", "before", "key"],
 538     "cite*"       : ["after", "before", "key"],
 539     "hfill" : ["", "", ""],
 540     "index"      : ["", "", "name"],
 541     "printindex" : ["", "", "name"],
 542     "label" : ["", "", "name"],
 543     "eqref"     : ["name", "", "reference"],
 544     "pageref"   : ["name", "", "reference"],
 545     "prettyref" : ["name", "", "reference"],
 546     "ref"       : ["name", "", "reference"],
 547     "vpageref"  : ["name", "", "reference"],
 548     "vref"      : ["name", "", "reference"],
 549     "tableofcontents" : ["", "", "type"],
 550     "htmlurl" : ["name", "", "target"],
 551     "url"     : ["name", "", "target"]}
 552
 553
 554 def convert_commandparams(document):
 555     """ Convert
 556
 557  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 558  \end_inset
 559
 560  to
 561
 562  \begin_inset LatexCommand cmdname
 563  name1 "opt1"
 564  name2 "opt2"
 565  name3 "arg"
 566  \end_inset
 567
 568  name1, name2 and name3 can be different for each command.
 569 """
 570     # \begin_inset LatexCommand bibitem was not the official version (see
 571     # convert_bibitem()), but could be read in, so we convert it here, too.
 572
 573     i = 0
 574     while 1:
 575         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 576         if i == -1:
 577             break
 578         command = document.body[i][26:].strip()
 579         if command == "":
 580             document.warning("Malformed LyX document: Missing LatexCommand name.")
 581             i = i + 1
 582             continue
 583
 584         j = find_token(document.body, "\\end_inset", i + 1)
 585         if j == -1:
 586             document.warning("Malformed document")
 587         else:
 588             command += "".join(document.body[i+1:j])
 589             document.body[i+1:j] = []
 590
 591         # The following parser is taken from the original InsetCommandParams::scanCommand
 592         name = ""
 593         option1 = ""
 594         option2 = ""
 595         argument = ""
 596         state = "WS"
 597         # Used to handle things like \command[foo[bar]]{foo{bar}}
 598         nestdepth = 0
 599         b = 0
 600         for c in command:
 601             if ((state == "CMDNAME" and c == ' ') or
 602                 (state == "CMDNAME" and c == '[') or
 603                 (state == "CMDNAME" and c == '{')):
 604                 state = "WS"
 605             if ((state == "OPTION" and c == ']') or
 606                 (state == "SECOPTION" and c == ']') or
 607                 (state == "CONTENT" and c == '}')):
 608                 if nestdepth == 0:
 609                     state = "WS"
 610                 else:
 611                     nestdepth = nestdepth - 1
 612             if ((state == "OPTION" and c == '[') or
 613                 (state == "SECOPTION" and c == '[') or
 614                 (state == "CONTENT" and c == '{')):
 615                 nestdepth = nestdepth + 1
 616             if state == "CMDNAME":
 617                     name += c
 618             elif state == "OPTION":
 619                     option1 += c
 620             elif state == "SECOPTION":
 621                     option2 += c
 622             elif state == "CONTENT":
 623                     argument += c
 624             elif state == "WS":
 625                 if c == '\\':
 626                     state = "CMDNAME"
 627                 elif c == '[' and b != ']':
 628                     state = "OPTION"
 629                     nestdepth = 0 # Just to be sure
 630                 elif c == '[' and b == ']':
 631                     state = "SECOPTION"
 632                     nestdepth = 0 # Just to be sure
 633                 elif c == '{':
 634                     state = "CONTENT"
 635                     nestdepth = 0 # Just to be sure
 636             b = c
 637
 638         # Now we have parsed the command, output the parameters
 639         lines = ["\\begin_inset LatexCommand %s" % name]
 640         if option1 != "":
 641             if commandparams_info[name][0] == "":
 642                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 643             else:
 644                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 645         if option2 != "":
 646             if commandparams_info[name][1] == "":
 647                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 648             else:
 649                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 650         if argument != "":
 651             if commandparams_info[name][2] == "":
 652                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 653             else:
 654                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 655         document.body[i:i+1] = lines
 656         i = i + 1
 657
 658
 659 def revert_commandparams(document):
 660     regex = re.compile(r'(\S+)\s+(.+)')
 661     i = 0
 662     while 1:
 663         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 664         if i == -1:
 665             break
 666         name = document.body[i].split()[2]
 667         j = find_end_of_inset(document.body, i + 1)
 668         preview_line = ""
 669         option1 = ""
 670         option2 = ""
 671         argument = ""
 672         for k in range(i + 1, j):
 673             match = re.match(regex, document.body[k])
 674             if match:
 675                 pname = match.group(1)
 676                 pvalue = match.group(2)
 677                 if pname == "preview":
 678                     preview_line = document.body[k]
 679                 elif (commandparams_info[name][0] != "" and
 680                       pname == commandparams_info[name][0]):
 681                     option1 = pvalue.strip('"').replace('\\"', '"')
 682                 elif (commandparams_info[name][1] != "" and
 683                       pname == commandparams_info[name][1]):
 684                     option2 = pvalue.strip('"').replace('\\"', '"')
 685                 elif (commandparams_info[name][2] != "" and
 686                       pname == commandparams_info[name][2]):
 687                     argument = pvalue.strip('"').replace('\\"', '"')
 688             elif document.body[k].strip() != "":
 689                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 690         if name == "bibitem":
 691             if option1 == "":
 692                 lines = ["\\bibitem {%s}" % argument]
 693             else:
 694                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 695         else:
 696             if option1 == "":
 697                 if option2 == "":
 698                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 699                 else:
 700                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 701             else:
 702                 if option2 == "":
 703                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 704                 else:
 705                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 706         if name != "bibitem":
 707             if preview_line != "":
 708                 lines.append(preview_line)
 709             lines.append('')
 710             lines.append('\\end_inset')
 711         document.body[i:j+1] = lines
 712         i = j + 1
 713
 714
 715 def revert_nomenclature(document):
 716     " Convert nomenclature entry to ERT. "
 717     regex = re.compile(r'(\S+)\s+(.+)')
 718     i = 0
 719     use_nomencl = 0
 720     while 1:
 721         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 722         if i == -1:
 723             break
 724         use_nomencl = 1
 725         j = find_end_of_inset(document.body, i + 1)
 726         preview_line = ""
 727         symbol = ""
 728         description = ""
 729         prefix = ""
 730         for k in range(i + 1, j):
 731             match = re.match(regex, document.body[k])
 732             if match:
 733                 name = match.group(1)
 734                 value = match.group(2)
 735                 if name == "preview":
 736                     preview_line = document.body[k]
 737                 elif name == "symbol":
 738                     symbol = value.strip('"').replace('\\"', '"')
 739                 elif name == "description":
 740                     description = value.strip('"').replace('\\"', '"')
 741                 elif name == "prefix":
 742                     prefix = value.strip('"').replace('\\"', '"')
 743             elif document.body[k].strip() != "":
 744                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 745         if prefix == "":
 746             command = 'nomenclature{%s}{%s}' % (symbol, description)
 747         else:
 748             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 749         document.body[i:j+1] = ['\\begin_inset ERT',
 750                                 'status collapsed',
 751                                 '',
 752                                 '\\begin_layout %s' % document.default_layout,
 753                                 '',
 754                                 '',
 755                                 '\\backslash',
 756                                 command,
 757                                 '\\end_layout',
 758                                 '',
 759                                 '\\end_inset']
 760         i = i + 11
 761     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 762         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 763         document.preamble.append('\\makenomenclature')
 764
 765
 766 def revert_printnomenclature(document):
 767     " Convert printnomenclature to ERT. "
 768     regex = re.compile(r'(\S+)\s+(.+)')
 769     i = 0
 770     use_nomencl = 0
 771     while 1:
 772         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 773         if i == -1:
 774             break
 775         use_nomencl = 1
 776         j = find_end_of_inset(document.body, i + 1)
 777         preview_line = ""
 778         labelwidth = ""
 779         for k in range(i + 1, j):
 780             match = re.match(regex, document.body[k])
 781             if match:
 782                 name = match.group(1)
 783                 value = match.group(2)
 784                 if name == "preview":
 785                     preview_line = document.body[k]
 786                 elif name == "labelwidth":
 787                     labelwidth = value.strip('"').replace('\\"', '"')
 788             elif document.body[k].strip() != "":
 789                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 790         if labelwidth == "":
 791             command = 'nomenclature{}'
 792         else:
 793             command = 'nomenclature[%s]' % labelwidth
 794         document.body[i:j+1] = ['\\begin_inset ERT',
 795                                 'status collapsed',
 796                                 '',
 797                                 '\\begin_layout %s' % document.default_layout,
 798                                 '',
 799                                 '',
 800                                 '\\backslash',
 801                                 command,
 802                                 '\\end_layout',
 803                                 '',
 804                                 '\\end_inset']
 805         i = i + 11
 806     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 807         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 808         document.preamble.append('\\makenomenclature')
 809
 810
 811 def convert_esint(document):
 812     " Add \\use_esint setting to header. "
 813     i = find_token(document.header, "\\cite_engine", 0)
 814     if i == -1:
 815         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 816         return
 817     # 0 is off, 1 is auto, 2 is on.
 818     document.header.insert(i, '\\use_esint 0')
 819
 820
 821 def revert_esint(document):
 822     " Remove \\use_esint setting from header. "
 823     i = find_token(document.header, "\\use_esint", 0)
 824     if i == -1:
 825         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 826         return
 827     use_esint = document.header[i].split()[1]
 828     del document.header[i]
 829     # 0 is off, 1 is auto, 2 is on.
 830     if (use_esint == 2):
 831         document.preamble.append('\\usepackage{esint}')
 832
 833
 834 def revert_clearpage(document):
 835     " clearpage -> ERT "
 836     i = 0
 837     while 1:
 838         i = find_token(document.body, "\\clearpage", i)
 839         if i == -1:
 840             break
 841         document.body[i:i+1] =  ['\\begin_inset ERT',
 842                                 'status collapsed',
 843                                 '',
 844                                 '\\begin_layout %s' % document.default_layout,
 845                                 '',
 846                                 '',
 847                                 '\\backslash',
 848                                 'clearpage',
 849                                 '\\end_layout',
 850                                 '',
 851                                 '\\end_inset']
 852     i = i + 1
 853
 854
 855 def revert_cleardoublepage(document):
 856     " cleardoublepage -> ERT "
 857     i = 0
 858     while 1:
 859         i = find_token(document.body, "\\cleardoublepage", i)
 860         if i == -1:
 861             break
 862         document.body[i:i+1] =  ['\\begin_inset ERT',
 863                                 'status collapsed',
 864                                 '',
 865                                 '\\begin_layout %s' % document.default_layout,
 866                                 '',
 867                                 '',
 868                                 '\\backslash',
 869                                 'cleardoublepage',
 870                                 '\\end_layout',
 871                                 '',
 872                                 '\\end_inset']
 873     i = i + 1
 874
 875
 876 def convert_lyxline(document):
 877     " remove fontsize commands for \lyxline "
 878     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 879     # to change the line thickness. The new definiton does this so that imported
 880     # \lyxlines would have a different line thickness. The eventual fontsize command
 881     # before \lyxline is therefore removed to get the same output.
 882     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 883                  "large", "Large", "LARGE", "huge", "Huge"]
 884     for n in range(0, len(fontsizes)):
 885         i = 0
 886         k = 0
 887         while i < len(document.body):
 888             i = find_token(document.body, "\\size " + fontsizes[n], i)
 889             k = find_token(document.body, "\\lyxline", i)
 890             # the corresponding fontsize command is always 2 lines before the \lyxline
 891             if (i != -1 and k == i+2):
 892                 document.body[i:i+1] = []
 893             else:
 894                 break
 895         i = i + 1
 896
 897
 898 def revert_encodings(document):
 899     " Set new encodings to auto. "
 900     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 901                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 902                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 903     i = find_token(document.header, "\\inputencoding", 0)
 904     if i == -1:
 905         document.header.append("\\inputencoding auto")
 906     else:
 907         inputenc = get_value(document.header, "\\inputencoding", i)
 908         if inputenc in encodings:
 909             document.header[i] = "\\inputencoding auto"
 910     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 911
 912
 913 def convert_caption(document):
 914     " Convert caption layouts to caption insets. "
 915     i = 0
 916     while 1:
 917         i = find_token(document.body, "\\begin_layout Caption", i)
 918         if i == -1:
 919             return
 920         j = find_end_of_layout(document.body, i)
 921         if j == -1:
 922             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 923             return
 924
 925         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 926         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 927                             "\\begin_inset Caption", "",
 928                             "\\begin_layout %s" % document.default_layout]
 929         i = i + 1
 930
 931
 932 def revert_caption(document):
 933     " Convert caption insets to caption layouts. "
 934     " This assumes that the text class has a caption style. "
 935     i = 0
 936     while 1:
 937         i = find_token(document.body, "\\begin_inset Caption", i)
 938         if i == -1:
 939             return
 940
 941         # We either need to delete the previous \begin_layout line, or we
 942         # need to end the previous layout if this inset is not in the first
 943         # position of the paragraph.
 944         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 945         if layout_before == -1:
 946             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 947             return
 948         layout_line = document.body[layout_before]
 949         del_layout_before = True
 950         l = layout_before + 1
 951         while l < i:
 952             if document.body[l] != "":
 953                 del_layout_before = False
 954                 break
 955             l = l + 1
 956         if del_layout_before:
 957             del document.body[layout_before:i]
 958             i = layout_before
 959         else:
 960             document.body[i:i] = ["\\end_layout", ""]
 961             i = i + 2
 962
 963         # Find start of layout in the inset and end of inset
 964         j = find_token(document.body, "\\begin_layout", i)
 965         if j == -1:
 966             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 967             return
 968         k = find_end_of_inset(document.body, i)
 969         if k == -1:
 970             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 971             return
 972
 973         # We either need to delete the following \end_layout line, or we need
 974         # to restart the old layout if this inset is not at the paragraph end.
 975         layout_after = find_token(document.body, "\\end_layout", k)
 976         if layout_after == -1:
 977             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 978             return
 979         del_layout_after = True
 980         l = k + 1
 981         while l < layout_after:
 982             if document.body[l] != "":
 983                 del_layout_after = False
 984                 break
 985             l = l + 1
 986         if del_layout_after:
 987             del document.body[k+1:layout_after+1]
 988         else:
 989             document.body[k+1:k+1] = [layout_line, ""]
 990
 991         # delete \begin_layout and \end_inset and replace \begin_inset with
 992         # "\begin_layout Caption". This works because we can only have one
 993         # paragraph in the caption inset: The old \end_layout will be recycled.
 994         del document.body[k]
 995         if document.body[k] == "":
 996             del document.body[k]
 997         del document.body[j]
 998         if document.body[j] == "":
 999             del document.body[j]
1000         document.body[i] = "\\begin_layout Caption"
1001         if document.body[i+1] == "":
1002             del document.body[i+1]
1003         i = i + 1
1004
1005
1006 # Accents of InsetLaTeXAccent
1007 accent_map = {
1008     "`" : u'\u0300', # grave
1009     "'" : u'\u0301', # acute
1010     "^" : u'\u0302', # circumflex
1011     "~" : u'\u0303', # tilde
1012     "=" : u'\u0304', # macron
1013     "u" : u'\u0306', # breve
1014     "." : u'\u0307', # dot above
1015     "\"": u'\u0308', # diaeresis
1016     "r" : u'\u030a', # ring above
1017     "H" : u'\u030b', # double acute
1018     "v" : u'\u030c', # caron
1019     "b" : u'\u0320', # minus sign below
1020     "d" : u'\u0323', # dot below
1021     "c" : u'\u0327', # cedilla
1022     "k" : u'\u0328', # ogonek
1023     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1024                      # only one is given as argument, so we don't need to
1025                      # treat it differently.
1026 }
1027
1028
1029 # special accents of InsetLaTeXAccent without argument
1030 special_accent_map = {
1031     'i' : u'\u0131', # dotless i
1032     'j' : u'\u0237', # dotless j
1033     'l' : u'\u0142', # l with stroke
1034     'L' : u'\u0141'  # L with stroke
1035 }
1036
1037
1038 # special accent arguments of InsetLaTeXAccent
1039 accented_map = {
1040     '\\i' : u'\u0131', # dotless i
1041     '\\j' : u'\u0237'  # dotless j
1042 }
1043
1044
1045 def _convert_accent(accent, accented_char):
1046     type = accent
1047     char = accented_char
1048     if char == '':
1049         if type in special_accent_map:
1050             return special_accent_map[type]
1051         # a missing char is treated as space by LyX
1052         char = ' '
1053     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1054         # Special caron, only used with t, d, l and L.
1055         # It is not in the map because we convert it to the same unicode
1056         # character as the normal caron: \q{} is only defined if babel with
1057         # the czech or slovak language is used, and the normal caron
1058         # produces the correct output if the T1 font encoding is used.
1059         # For the same reason we never convert to \q{} in the other direction.
1060         type = 'v'
1061     elif char in accented_map:
1062         char = accented_map[char]
1063     elif (len(char) > 1):
1064         # We can only convert accents on a single char
1065         return ''
1066     a = accent_map.get(type)
1067     if a:
1068         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1069     return ''
1070
1071
1072 def convert_ertbackslash(body, i, ert, default_layout):
1073     r""" -------------------------------------------------------------------------------------------
1074     Convert backslashes and '\n' into valid ERT code, append the converted
1075     text to body[i] and return the (maybe incremented) line index i"""
1076
1077     for c in ert:
1078         if c == '\\':
1079             body[i] = body[i] + '\\backslash '
1080             i = i + 1
1081             body.insert(i, '')
1082         elif c == '\n':
1083             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1084             i = i + 4
1085         else:
1086             body[i] = body[i] + c
1087     return i
1088
1089
1090 def convert_accent(document):
1091     # The following forms are supported by LyX:
1092     # '\i \"{a}' (standard form, as written by LyX)
1093     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1094     # '\i \"{ }' (also accepted if the accented char is a space)
1095     # '\i \" a'  (also accepted)
1096     # '\i \"'    (also accepted)
1097     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1098     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1099     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1100     i = 0
1101     while 1:
1102         i = find_re(document.body, re_wholeinset, i)
1103         if i == -1:
1104             return
1105         match = re_wholeinset.match(document.body[i])
1106         prefix = match.group(1)
1107         contents = match.group(3).strip()
1108         match = re_contents.match(contents)
1109         if match:
1110             # Strip first char (always \)
1111             accent = match.group(1)[1:]
1112             accented_contents = match.group(2).strip()
1113             match = re_accentedcontents.match(accented_contents)
1114             accented_char = match.group(1)
1115             converted = _convert_accent(accent, accented_char)
1116             if converted == '':
1117                 # Normalize contents
1118                 contents = '%s{%s}' % (accent, accented_char),
1119             else:
1120                 document.body[i] = '%s%s' % (prefix, converted)
1121                 i += 1
1122                 continue
1123         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1124         document.body[i] = prefix
1125         document.body[i+1:i+1] = ['\\begin_inset ERT',
1126                                   'status collapsed',
1127                                   '',
1128                                   '\\begin_layout %s' % document.default_layout,
1129                                   '',
1130                                   '',
1131                                   '']
1132         i = convert_ertbackslash(document.body, i + 7,
1133                                  '\\%s' % contents,
1134                                  document.default_layout)
1135         document.body[i+1:i+1] = ['\\end_layout',
1136                                   '',
1137                                   '\\end_inset']
1138         i += 3
1139
1140
1141 def revert_accent(document):
1142     inverse_accent_map = {}
1143     for k in accent_map:
1144         inverse_accent_map[accent_map[k]] = k
1145     inverse_special_accent_map = {}
1146     for k in special_accent_map:
1147         inverse_special_accent_map[special_accent_map[k]] = k
1148     inverse_accented_map = {}
1149     for k in accented_map:
1150         inverse_accented_map[accented_map[k]] = k
1151
1152     # Since LyX may insert a line break within a word we must combine all
1153     # words before unicode normalization.
1154     # We do this only if the next line starts with an accent, otherwise we
1155     # would create things like '\begin_inset ERTstatus'.
1156     numberoflines = len(document.body)
1157     for i in range(numberoflines-1):
1158         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1159             continue
1160         if (document.body[i+1][0] in inverse_accent_map):
1161             # the last character of this line and the first of the next line
1162             # form probably a surrogate pair.
1163             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1164                 document.body[i] += document.body[i+1][0]
1165                 document.body[i+1] = document.body[i+1][1:]
1166
1167     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1168     # This is needed to catch all accented characters.
1169     for i in range(numberoflines):
1170         # Unfortunately we have a mixture of unicode strings and plain strings,
1171         # because we never use u'xxx' for string literals, but 'xxx'.
1172         # Therefore we may have to try two times to normalize the data.
1173         try:
1174             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1175         except TypeError:
1176             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1177
1178     # Replace accented characters with InsetLaTeXAccent
1179     # Do not convert characters that can be represented in the chosen
1180     # encoding.
1181     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1182     lang_re = re.compile(r"^\\lang\s(\S+)")
1183     i = 0
1184     while i < len(document.body):
1185
1186         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1187             # Track the encoding of the current line
1188             result = lang_re.match(document.body[i])
1189             if result:
1190                 language = result.group(1)
1191                 if language == "default":
1192                     encoding_stack[-1] = document.encoding
1193                 else:
1194                     from lyx2lyx_lang import lang
1195                     encoding_stack[-1] = lang[language][3]
1196                 continue
1197             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1198                 encoding_stack.append(encoding_stack[-1])
1199                 continue
1200             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1201                 del encoding_stack[-1]
1202                 continue
1203
1204         for j in range(len(document.body[i])):
1205             # dotless i and dotless j are both in special_accent_map and can
1206             # occur as an accented character, so we need to test that the
1207             # following character is no accent
1208             if (document.body[i][j] in inverse_special_accent_map and
1209                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1210                 accent = document.body[i][j]
1211                 try:
1212                     dummy = accent.encode(encoding_stack[-1])
1213                 except UnicodeEncodeError:
1214                     # Insert the rest of the line as new line
1215                     if j < len(document.body[i]) - 1:
1216                         document.body.insert(i+1, document.body[i][j+1:])
1217                     # Delete the accented character
1218                     if j > 0:
1219                         document.body[i] = document.body[i][:j-1]
1220                     else:
1221                         document.body[i] = u''
1222                     # Finally add the InsetLaTeXAccent
1223                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1224                     break
1225             elif j > 0 and document.body[i][j] in inverse_accent_map:
1226                 accented_char = document.body[i][j-1]
1227                 if accented_char == ' ':
1228                     # Conform to LyX output
1229                     accented_char = ''
1230                 elif accented_char in inverse_accented_map:
1231                     accented_char = inverse_accented_map[accented_char]
1232                 accent = document.body[i][j]
1233                 try:
1234                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1235                 except UnicodeEncodeError:
1236                     # Insert the rest of the line as new line
1237                     if j < len(document.body[i]) - 1:
1238                         document.body.insert(i+1, document.body[i][j+1:])
1239                     # Delete the accented characters
1240                     if j > 1:
1241                         document.body[i] = document.body[i][:j-2]
1242                     else:
1243                         document.body[i] = u''
1244                     # Finally add the InsetLaTeXAccent
1245                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1246                     break
1247         i = i + 1
1248
1249     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1250     for i in range(numberoflines):
1251         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1252
1253
1254 def normalize_font_whitespace_259(document):
1255     """ Before format 259 the font changes were ignored if a
1256     whitespace was the first or last character in the sequence, this function
1257     transfers the whitespace outside."""
1258
1259     char_properties = {"\\series": "default",
1260                        "\\emph": "default",
1261                        "\\color": "none",
1262                        "\\shape": "default",
1263                        "\\bar": "default",
1264                        "\\family": "default"}
1265     return normalize_font_whitespace(document, char_properties)
1266
1267 def normalize_font_whitespace_274(document):
1268     """ Before format 259 (sic) the font changes were ignored if a
1269     whitespace was the first or last character in the sequence. This was
1270     corrected for most font properties in format 259, but the language
1271     was forgotten then. This function applies the same conversion done
1272     there (namely, transfers the whitespace outside) for font language
1273     changes, as well."""
1274
1275     char_properties = {"\\lang": "default"}
1276     return normalize_font_whitespace(document, char_properties)
1277
1278 def get_paragraph_language(document, i):
1279     """ Return the language of the paragraph in which line i of the document
1280     body is. If the first thing in the paragraph is a \\lang command, that
1281     is the paragraph's langauge; otherwise, the paragraph's language is the
1282     document's language."""
1283
1284     lines = document.body
1285
1286     first_nonempty_line = \
1287         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1288
1289     words = lines[first_nonempty_line].split()
1290
1291     if len(words) > 1 and words[0] == "\\lang":
1292         return words[1]
1293     else:
1294         return document.language
1295
1296 def normalize_font_whitespace(document, char_properties):
1297     """ Before format 259 the font changes were ignored if a
1298     whitespace was the first or last character in the sequence, this function
1299     transfers the whitespace outside. Only a change in one of the properties
1300     in the provided     char_properties is handled by this function."""
1301
1302     if document.backend != "latex":
1303         return
1304
1305     lines = document.body
1306
1307     changes = {}
1308
1309     i = 0
1310     while i < len(lines):
1311         words = lines[i].split()
1312
1313         if len(words) > 0 and words[0] == "\\begin_layout":
1314             # a new paragraph resets all font changes
1315             changes.clear()
1316             # also reset the default language to be the paragraph's language
1317             if "\\lang" in char_properties.keys():
1318                 char_properties["\\lang"] = \
1319                     get_paragraph_language(document, i + 1)
1320
1321         elif len(words) > 1 and words[0] in char_properties.keys():
1322             # we have a font change
1323             if char_properties[words[0]] == words[1]:
1324                 # property gets reset
1325                 if words[0] in changes.keys():
1326                     del changes[words[0]]
1327                 defaultproperty = True
1328             else:
1329                 # property gets set
1330                 changes[words[0]] = words[1]
1331                 defaultproperty = False
1332
1333             # We need to explicitly reset all changed properties if we find
1334             # a space below, because LyX 1.4 would output the space after
1335             # closing the previous change and before starting the new one,
1336             # and closing a font change means to close all properties, not
1337             # just the changed one.
1338
1339             if lines[i-1] and lines[i-1][-1] == " ":
1340                 lines[i-1] = lines[i-1][:-1]
1341                 # a space before the font change
1342                 added_lines = [" "]
1343                 for k in changes.keys():
1344                     # exclude property k because that is already in lines[i]
1345                     if k != words[0]:
1346                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1347                 for k in changes.keys():
1348                     # exclude property k because that must be added below anyway
1349                     if k != words[0]:
1350                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1351                 if defaultproperty:
1352                     # Property is reset in lines[i], so add the new stuff afterwards
1353                     lines[i+1:i+1] = added_lines
1354                 else:
1355                     # Reset property for the space
1356                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1357                     lines[i:i] = added_lines
1358                 i = i + len(added_lines)
1359
1360             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1361                 # a space after the font change
1362                 if (lines[i+1] == " " and lines[i+2]):
1363                     next_words = lines[i+2].split()
1364                     if len(next_words) > 0 and next_words[0] == words[0]:
1365                         # a single blank with a property different from the
1366                         # previous and the next line must not be changed
1367                         i = i + 2
1368                         continue
1369                 lines[i+1] = lines[i+1][1:]
1370                 added_lines = [" "]
1371                 for k in changes.keys():
1372                     # exclude property k because that is already in lines[i]
1373                     if k != words[0]:
1374                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1375                 for k in changes.keys():
1376                     # exclude property k because that must be added below anyway
1377                     if k != words[0]:
1378                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1379                 # Reset property for the space
1380                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1381                 lines[i:i] = added_lines
1382                 i = i + len(added_lines)
1383
1384         i = i + 1
1385
1386
1387 def revert_utf8x(document):
1388     " Set utf8x encoding to utf8. "
1389     i = find_token(document.header, "\\inputencoding", 0)
1390     if i == -1:
1391         document.header.append("\\inputencoding auto")
1392     else:
1393         inputenc = get_value(document.header, "\\inputencoding", i)
1394         if inputenc == "utf8x":
1395             document.header[i] = "\\inputencoding utf8"
1396     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1397
1398
1399 def revert_utf8plain(document):
1400     " Set utf8plain encoding to utf8. "
1401     i = find_token(document.header, "\\inputencoding", 0)
1402     if i == -1:
1403         document.header.append("\\inputencoding auto")
1404     else:
1405         inputenc = get_value(document.header, "\\inputencoding", i)
1406         if inputenc == "utf8-plain":
1407             document.header[i] = "\\inputencoding utf8"
1408     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1409
1410
1411 def revert_beamer_alert(document):
1412     " Revert beamer's \\alert inset back to ERT. "
1413     i = 0
1414     while 1:
1415         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1416         if i == -1:
1417             return
1418         document.body[i] = "\\begin_inset ERT"
1419         i = i + 1
1420         while 1:
1421             if (document.body[i][:13] == "\\begin_layout"):
1422                 # Insert the \alert command
1423                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1424                 break
1425             i = i + 1
1426
1427         i = i + 1
1428
1429
1430 def revert_beamer_structure(document):
1431     " Revert beamer's \\structure inset back to ERT. "
1432     i = 0
1433     while 1:
1434         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1435         if i == -1:
1436             return
1437         document.body[i] = "\\begin_inset ERT"
1438         i = i + 1
1439         while 1:
1440             if (document.body[i][:13] == "\\begin_layout"):
1441                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1442                 break
1443             i = i + 1
1444
1445         i = i + 1
1446
1447
1448 def convert_changes(document):
1449     " Switch output_changes off if tracking_changes is off. "
1450     i = find_token(document.header, '\\tracking_changes', 0)
1451     if i == -1:
1452         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1453         return
1454     j = find_token(document.header, '\\output_changes', 0)
1455     if j == -1:
1456         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1457         return
1458     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1459     output_changes = get_value(document.header, "\\output_changes", j)
1460     if tracking_changes == "false" and output_changes == "true":
1461         document.header[j] = "\\output_changes false"
1462
1463
1464 def revert_ascii(document):
1465     " Set ascii encoding to auto. "
1466     i = find_token(document.header, "\\inputencoding", 0)
1467     if i == -1:
1468         document.header.append("\\inputencoding auto")
1469     else:
1470         inputenc = get_value(document.header, "\\inputencoding", i)
1471         if inputenc == "ascii":
1472             document.header[i] = "\\inputencoding auto"
1473     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1474
1475
1476 def normalize_language_name(document):
1477     lang = { "brazil": "brazilian",
1478              "portuges": "portuguese"}
1479
1480     if document.language in lang:
1481         document.language = lang[document.language]
1482         i = find_token(document.header, "\\language", 0)
1483         document.header[i] = "\\language %s" % document.language
1484
1485
1486 def revert_language_name(document):
1487     lang = { "brazilian": "brazil",
1488              "portuguese": "portuges"}
1489
1490     if document.language in lang:
1491         document.language = lang[document.language]
1492         i = find_token(document.header, "\\language", 0)
1493         document.header[i] = "\\language %s" % document.language
1494
1495 #
1496 #  \textclass cv -> \textclass simplecv
1497 def convert_cv_textclass(document):
1498     if document.textclass == "cv":
1499         document.textclass = "simplecv"
1500
1501
1502 def revert_cv_textclass(document):
1503     if document.textclass == "simplecv":
1504         document.textclass = "cv"
1505
1506
1507 #
1508 # add scaleBeforeRotation graphics param
1509 def convert_graphics_rotation(document):
1510     " add scaleBeforeRotation graphics parameter. "
1511     i = 0
1512     while 1:
1513         i = find_token(document.body, "\\begin_inset Graphics", i)
1514         if i == -1:
1515             return
1516         j = find_end_of_inset(document.body, i+1)
1517         if j == -1:
1518             # should not happen
1519             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1520         # Seach for rotateAngle and width or height or scale
1521         # If these params are not there, nothing needs to be done.
1522         k = find_token(document.body, "\trotateAngle", i + 1, j)
1523         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1524         if (k != -1 and l != -1):
1525             document.body.insert(j, 'scaleBeforeRotation')
1526         i = i + 1
1527
1528
1529 #
1530 # remove scaleBeforeRotation graphics param
1531 def revert_graphics_rotation(document):
1532     " remove scaleBeforeRotation graphics parameter. "
1533     i = 0
1534     while 1:
1535         i = find_token(document.body, "\\begin_inset Graphics", i)
1536         if i == -1:
1537             return
1538         j = find_end_of_inset(document.body, i + 1)
1539         if j == -1:
1540             # should not happen
1541             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1542         # If there's a scaleBeforeRotation param, just remove that
1543         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1544         if k != -1:
1545             del document.body[k]
1546         else:
1547             # if not, and if we have rotateAngle and width or height or scale,
1548             # we have to put the rotateAngle value to special
1549             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1550             special = get_value(document.body, 'special', i + 1, j)
1551             if rotateAngle != "":
1552                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1553                 if k == -1:
1554                     break
1555                 if special == "":
1556                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1557                 else:
1558                     l = find_token(document.body, "\tspecial", i + 1, j)
1559                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1560                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1561                 if k != -1:
1562                     del document.body[k]
1563         i = i + 1
1564
1565
1566
1567 def convert_tableborder(document):
1568     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1569     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1570     i = 0
1571     while i < len(document.body):
1572         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1573         k = document.body[i].find("|>{", 0, len(document.body[i]))
1574         # the two tokens have to be in one line
1575         if (h != -1 and k != -1):
1576             # delete the "|"
1577             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1578         i = i + 1
1579
1580
1581 def revert_tableborder(document):
1582     i = 0
1583     while i < len(document.body):
1584         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1585         k = document.body[i].find(">{", 0, len(document.body[i]))
1586         # the two tokens have to be in one line
1587         if (h != -1 and k != -1):
1588             # add the "|"
1589             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1590         i = i + 1
1591
1592
1593 def revert_armenian(document):
1594
1595     # set inputencoding from armscii8 to auto
1596     if document.inputencoding == "armscii8":
1597         i = find_token(document.header, "\\inputencoding", 0)
1598         if i != -1:
1599             document.header[i] = "\\inputencoding auto"
1600     # check if preamble exists, if not k is set to -1
1601     i = 0
1602     k = -1
1603     while i < len(document.preamble):
1604         if k == -1:
1605             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1606         if k == -1:
1607             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1608         i = i + 1
1609     # add the entry \usepackage{armtex} to the document preamble
1610     if document.language == "armenian":
1611         # set the armtex entry as the first preamble line
1612         if k != -1:
1613             document.preamble[0:0] = ["\\usepackage{armtex}"]
1614         # create the preamble when it doesn't exist
1615         else:
1616             document.preamble.append('\\usepackage{armtex}')
1617     # Set document language from armenian to english
1618     if document.language == "armenian":
1619         document.language = "english"
1620         i = find_token(document.header, "\\language", 0)
1621         if i != -1:
1622             document.header[i] = "\\language english"
1623
1624
1625 def revert_CJK(document):
1626     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1627     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1628                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1629     i = find_token(document.header, "\\inputencoding", 0)
1630     if i == -1:
1631         document.header.append("\\inputencoding auto")
1632     else:
1633         inputenc = get_value(document.header, "\\inputencoding", i)
1634         if inputenc in encodings:
1635             document.header[i] = "\\inputencoding default"
1636     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1637
1638     if document.language == "chinese-simplified" or \
1639        document.language == "chinese-traditional" or \
1640        document.language == "japanese" or document.language == "korean":
1641         document.language = "english"
1642         i = find_token(document.header, "\\language", 0)
1643         if i != -1:
1644             document.header[i] = "\\language english"
1645
1646
1647 def revert_preamble_listings_params(document):
1648     " Revert preamble option \listings_params "
1649     i = find_token(document.header, "\\listings_params", 0)
1650     if i != -1:
1651         document.preamble.append('\\usepackage{listings}')
1652         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1653         document.header.pop(i);
1654
1655
1656 def revert_listings_inset(document):
1657     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1658 FROM
1659
1660 \begin_inset
1661 lstparams "language=Delphi"
1662 inline true
1663 status open
1664
1665 \begin_layout Standard
1666 var i = 10;
1667 \end_layout
1668
1669 \end_inset
1670
1671 TO
1672
1673 \begin_inset ERT
1674 status open
1675 \begin_layout Standard
1676
1677
1678 \backslash
1679 lstinline[language=Delphi]{var i = 10;}
1680 \end_layout
1681
1682 \end_inset
1683
1684 There can be an caption inset in this inset
1685
1686 \begin_layout Standard
1687 \begin_inset Caption
1688
1689 \begin_layout Standard
1690 before label
1691 \begin_inset LatexCommand label
1692 name "lst:caption"
1693
1694 \end_inset
1695
1696 after label
1697 \end_layout
1698
1699 \end_inset
1700
1701
1702 \end_layout
1703
1704 '''
1705     i = 0
1706     while True:
1707         i = find_token(document.body, '\\begin_inset listings', i)
1708         if i == -1:
1709             break
1710         else:
1711             if not '\\usepackage{listings}' in document.preamble:
1712                 document.preamble.append('\\usepackage{listings}')
1713         j = find_end_of_inset(document.body, i + 1)
1714         if j == -1:
1715             # this should not happen
1716             break
1717         inline = 'false'
1718         params = ''
1719         status = 'open'
1720         # first three lines
1721         for line in range(i + 1, i + 4):
1722             if document.body[line].startswith('inline'):
1723                 inline = document.body[line].split()[1]
1724             if document.body[line].startswith('lstparams'):
1725                 params = document.body[line].split()[1].strip('"')
1726             if document.body[line].startswith('status'):
1727                 status = document.body[line].split()[1].strip()
1728                 k = line + 1
1729         # caption?
1730         caption = ''
1731         label = ''
1732         cap = find_token(document.body, '\\begin_inset Caption', i)
1733         if cap != -1:
1734             cap_end = find_end_of_inset(document.body, cap + 1)
1735             if cap_end == -1:
1736                 # this should not happen
1737                 break
1738             # label?
1739             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1740             if lbl != -1:
1741                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1742                 if lbl_end == -1:
1743                     # this should not happen
1744                     break
1745             else:
1746                 lbl = cap_end
1747                 lbl_end = cap_end
1748             for line in document.body[lbl : lbl_end + 1]:
1749                 if line.startswith('name '):
1750                     label = line.split()[1].strip('"')
1751                     break
1752             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1753                 if not line.startswith('\\'):
1754                     caption += line.strip()
1755             k = cap_end + 1
1756         inlinecode = ''
1757         # looking for the oneline code for lstinline
1758         inlinecode = document.body[find_end_of_layout(document.body,
1759             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1760         if len(caption) > 0:
1761             if len(params) == 0:
1762                 params = 'caption={%s}' % caption
1763             else:
1764                 params += ',caption={%s}' % caption
1765         if len(label) > 0:
1766             if len(params) == 0:
1767                 params = 'label={%s}' % label
1768             else:
1769                 params += ',label={%s}' % label
1770         if len(params) > 0:
1771             params = '[%s]' % params
1772             params = params.replace('\\', '\\backslash\n')
1773         if inline == 'true':
1774             document.body[i:(j+1)] = [r'\begin_inset ERT',
1775                                       'status %s' % status,
1776                                       r'\begin_layout %s' % document.default_layout,
1777                                       '',
1778                                       '',
1779                                       r'\backslash',
1780                                       'lstinline%s{%s}' % (params, inlinecode),
1781                                       r'\end_layout',
1782                                       '',
1783                                       r'\end_inset']
1784         else:
1785             document.body[i: j+1] =  [r'\begin_inset ERT',
1786                                       'status %s' % status,
1787                                       '',
1788                                       r'\begin_layout %s' % document.default_layout,
1789                                       '',
1790                                       '',
1791                                       r'\backslash',
1792                                       r'begin{lstlisting}%s' % params,
1793                                       r'\end_layout'
1794                                     ] + document.body[k : j - 1] + \
1795                                      ['',
1796                                       r'\begin_layout %s' % document.default_layout,
1797                                       '',
1798                                       r'\backslash',
1799                                       'end{lstlisting}',
1800                                       r'\end_layout',
1801                                       '',
1802                                       r'\end_inset']
1803
1804
1805 def revert_include_listings(document):
1806     r''' Revert lstinputlisting Include option , translate
1807 \begin_inset Include \lstinputlisting{file}[opt]
1808 preview false
1809
1810 \end_inset
1811
1812 TO
1813
1814 \begin_inset ERT
1815 status open
1816
1817 \begin_layout Standard
1818
1819
1820 \backslash
1821 lstinputlisting{file}[opt]
1822 \end_layout
1823
1824 \end_inset
1825     '''
1826
1827     i = 0
1828     while True:
1829         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1830         if i == -1:
1831             break
1832         else:
1833             if not '\\usepackage{listings}' in document.preamble:
1834                 document.preamble.append('\\usepackage{listings}')
1835         j = find_end_of_inset(document.body, i + 1)
1836         if j == -1:
1837             # this should not happen
1838             break
1839         # find command line lstinputlisting{file}[options]
1840         cmd, file, option = '', '', ''
1841         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1842             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1843         option = option.replace('\\', '\\backslash\n')
1844         document.body[i : j + 1] = [r'\begin_inset ERT',
1845                                     'status open',
1846                                     '',
1847                                     r'\begin_layout %s' % document.default_layout,
1848                                     '',
1849                                     '',
1850                                     r'\backslash',
1851                                     '%s%s{%s}' % (cmd, option, file),
1852                                     r'\end_layout',
1853                                     '',
1854                                     r'\end_inset']
1855
1856
1857 def revert_ext_font_sizes(document):
1858     if document.backend != "latex": return
1859     if not document.textclass.startswith("ext"): return
1860
1861     fontsize = get_value(document.header, '\\paperfontsize', 0)
1862     if fontsize not in ('10', '11', '12'): return
1863     fontsize += 'pt'
1864
1865     i = find_token(document.header, '\\paperfontsize', 0)
1866     document.header[i] = '\\paperfontsize default'
1867
1868     i = find_token(document.header, '\\options', 0)
1869     if i == -1:
1870         i = find_token(document.header, '\\textclass', 0) + 1
1871         document.header[i:i] = ['\\options %s' % fontsize]
1872     else:
1873         document.header[i] += ',%s' % fontsize
1874
1875
1876 def convert_ext_font_sizes(document):
1877     if document.backend != "latex": return
1878     if not document.textclass.startswith("ext"): return
1879
1880     fontsize = get_value(document.header, '\\paperfontsize', 0)
1881     if fontsize != 'default': return
1882
1883     i = find_token(document.header, '\\options', 0)
1884     if i == -1: return
1885
1886     options = get_value(document.header, '\\options', i)
1887
1888     fontsizes = '10pt', '11pt', '12pt'
1889     for fs in fontsizes:
1890         if options.find(fs) != -1:
1891             break
1892     else: # this else will only be attained if the for cycle had no match
1893         return
1894
1895     options = options.split(',')
1896     for j, opt in enumerate(options):
1897         if opt in fontsizes:
1898             fontsize = opt[:-2]
1899             del options[j]
1900             break
1901     else:
1902         return
1903
1904     k = find_token(document.header, '\\paperfontsize', 0)
1905     document.header[k] = '\\paperfontsize %s' % fontsize
1906
1907     if options:
1908         document.header[i] = '\\options %s' % ','.join(options)
1909     else:
1910         del document.header[i]
1911
1912
1913 def revert_separator_layout(document):
1914     r'''Revert --Separator-- to a lyx note
1915 From
1916
1917 \begin_layout --Separator--
1918 something
1919 \end_layout
1920
1921 to
1922
1923 \begin_layout Standard
1924 \begin_inset Note Note
1925 status open
1926
1927 \begin_layout Standard
1928 Separate Evironment
1929 \end_layout
1930
1931 \end_inset
1932 something
1933
1934 \end_layout
1935
1936     '''
1937
1938     i = 0
1939     while True:
1940         i = find_token(document.body, r'\begin_layout --Separator--', i)
1941         if i == -1:
1942             break
1943         j = find_end_of_layout(document.body, i + 1)
1944         if j == -1:
1945             # this should not happen
1946             break
1947         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1948                                     r'\begin_inset Note Note',
1949                                     'status open',
1950                                     '',
1951                                     r'\begin_layout %s' % document.default_layout,
1952                                     'Separate Environment',
1953                                     r'\end_layout',
1954                                     '',
1955                                     r'\end_inset'] + \
1956                                     document.body[ i + 1 : j] + \
1957                                     ['',
1958                                     r'\end_layout'
1959                                     ]
1960
1961
1962 def convert_arabic (document):
1963     if document.language == "arabic":
1964         document.language = "arabic_arabtex"
1965         i = find_token(document.header, "\\language", 0)
1966         if i != -1:
1967             document.header[i] = "\\language arabic_arabtex"
1968     i = 0
1969     while i < len(document.body):
1970         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1971         if (h != -1):
1972             # change the language name
1973             document.body[i] = '\lang arabic_arabtex'
1974         i = i + 1
1975
1976
1977 def revert_arabic (document):
1978     if document.language == "arabic_arabtex":
1979         document.language = "arabic"
1980         i = find_token(document.header, "\\language", 0)
1981         if i != -1:
1982             document.header[i] = "\\language arabic"
1983     i = 0
1984     while i < len(document.body):
1985         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1986         if (h != -1):
1987             # change the language name
1988             document.body[i] = '\lang arabic'
1989         i = i + 1
1990
1991
1992 ##
1993 # Conversion hub
1994 #
1995
1996 supported_versions = ["1.5.0","1.5"]
1997 convert = [[246, []],
1998            [247, [convert_font_settings]],
1999            [248, []],
2000            [249, [convert_utf8]],
2001            [250, []],
2002            [251, []],
2003            [252, [convert_commandparams, convert_bibitem]],
2004            [253, []],
2005            [254, [convert_esint]],
2006            [255, []],
2007            [256, []],
2008            [257, [convert_caption]],
2009            [258, [convert_lyxline]],
2010            [259, [convert_accent, normalize_font_whitespace_259]],
2011            [260, []],
2012            [261, [convert_changes]],
2013            [262, []],
2014            [263, [normalize_language_name]],
2015            [264, [convert_cv_textclass]],
2016            [265, [convert_tableborder]],
2017            [266, []],
2018            [267, []],
2019            [268, []],
2020            [269, []],
2021            [270, []],
2022            [271, [convert_ext_font_sizes]],
2023            [272, []],
2024            [273, []],
2025            [274, [normalize_font_whitespace_274]],
2026            [275, [convert_graphics_rotation]],
2027            [276, [convert_arabic]]
2028           ]
2029
2030 revert =  [
2031            [275, [revert_arabic]],
2032            [274, [revert_graphics_rotation]],
2033            [273, []],
2034            [272, [revert_separator_layout]],
2035            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2036            [270, [revert_ext_font_sizes]],
2037            [269, [revert_beamer_alert, revert_beamer_structure]],
2038            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2039            [267, [revert_CJK]],
2040            [266, [revert_utf8plain]],
2041            [265, [revert_armenian]],
2042            [264, [revert_tableborder]],
2043            [263, [revert_cv_textclass]],
2044            [262, [revert_language_name]],
2045            [261, [revert_ascii]],
2046            [260, []],
2047            [259, [revert_utf8x]],
2048            [258, []],
2049            [257, []],
2050            [256, [revert_caption]],
2051            [255, [revert_encodings]],
2052            [254, [revert_clearpage, revert_cleardoublepage]],
2053            [253, [revert_esint]],
2054            [252, [revert_nomenclature, revert_printnomenclature]],
2055            [251, [revert_commandparams]],
2056            [250, [revert_cs_label]],
2057            [249, []],
2058            [248, [revert_accent, revert_utf8, revert_unicode]],
2059            [247, [revert_booktabs]],
2060            [246, [revert_font_settings]],
2061            [245, [revert_framed]]]
2062
2063
2064 if __name__ == "__main__":
2065     pass