lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     inset_stack = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if document.inputencoding == "auto" or document.inputencoding == "default":
 257         for i in range(len(document.body)):
 258             result = lang_re.match(document.body[i])
 259             if result:
 260                 language = result.group(1)
 261                 if language == "default":
 262                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 263                     encoding_stack[-1] = document.encoding
 264                 else:
 265                     from lyx2lyx_lang import lang
 266                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 267                     encoding_stack[-1] = lang[language][3]
 268             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 269                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 270                 if len(inset_stack) > 0 and inset_stack[-1] in inset_types:
 271                     from lyx2lyx_lang import lang
 272                     encoding_stack.append(lang[document.language][3])
 273                 else:
 274                     encoding_stack.append(encoding_stack[-1])
 275             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 276                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 277                 if len(encoding_stack) == 1:
 278                     # Don't remove the document encoding from the stack
 279                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 280                 else:
 281                     del encoding_stack[-1]
 282             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 283                 inset_result = inset_re.match(document.body[i])
 284                 if inset_result:
 285                     inset_type = inset_result.group(1)
 286                     inset_stack.append(inset_type)
 287                 else:
 288                     inset_stack.append("")
 289             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 290                 del inset_stack[-1]
 291             if encoding_stack[-1] != document.encoding:
 292                 if forward:
 293                     # This line has been incorrectly interpreted as if it was
 294                     # encoded in 'encoding'.
 295                     # Convert back to the 8bit string that was in the file.
 296                     orig = document.body[i].encode(document.encoding)
 297                     # Convert the 8bit string that was in the file to unicode
 298                     # with the correct encoding.
 299                     document.body[i] = orig.decode(encoding_stack[-1])
 300                 else:
 301                     # Convert unicode to the 8bit string that will be written
 302                     # to the file with the correct encoding.
 303                     orig = document.body[i].encode(encoding_stack[-1])
 304                     # Convert the 8bit string that will be written to the
 305                     # file to fake unicode with the encoding that will later
 306                     # be used when writing to the file.
 307                     document.body[i] = orig.decode(document.encoding)
 308
 309
 310 def convert_utf8(document):
 311     " Set document encoding to UTF-8. "
 312     convert_multiencoding(document, True)
 313     document.encoding = "utf8"
 314
 315
 316 def revert_utf8(document):
 317     " Set document encoding to the value corresponding to inputencoding. "
 318     i = find_token(document.header, "\\inputencoding", 0)
 319     if i == -1:
 320         document.header.append("\\inputencoding auto")
 321     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 322         document.header[i] = "\\inputencoding auto"
 323     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 324     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 325     convert_multiencoding(document, False)
 326
 327
 328 def read_unicodesymbols():
 329     " Read the unicodesymbols list of unicode characters and corresponding commands."
 330     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 331     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 332     spec_chars = {}
 333     for line in fp.readlines():
 334         if line[0] != '#':
 335             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 336             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 337             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 338             try:
 339                 # flag1 and flag2 are preamble and other flags
 340                 [ucs4,command,flag1,flag2] =line.split(None,3)
 341                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 342             except:
 343                 pass
 344     fp.close()
 345
 346     return spec_chars
 347
 348
 349 def revert_unicode(document):
 350     '''Transform unicode characters that can not be written using the
 351 document encoding to commands according to the unicodesymbols
 352 file. Characters that can not be replaced by commands are replaced by
 353 an replacement string.  Flags other than 'combined' are currently not
 354 implemented.'''
 355
 356     replacement_character = '???'
 357     spec_chars = read_unicodesymbols()
 358
 359     # Define strings to start and end ERT and math insets
 360     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 361     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 362     math_intro='\n\\begin_inset Formula $'
 363     math_outro='$\n\\end_inset'
 364     # Find unicode characters and replace them
 365     in_ert = False # flag set to 1 if in ERT inset
 366     in_math = False # flag set to 1 if in math inset
 367     insets = [] # list of active insets
 368
 369     # Go through the file to capture all combining characters
 370     last_char = '' # to store the previous character
 371
 372     i = 0
 373     while i < len(document.body):
 374         line = document.body[i]
 375         # Check for insets
 376         if line.find('\\begin_inset') > -1:
 377             # check which inset to start
 378             if line.find('\\begin_inset ERT') > -1:
 379                 in_ert = True
 380                 insets.append('ert')
 381             elif line.find('\\begin_inset Formula') > -1:
 382                 in_math = True
 383                 insets.append('math')
 384             else:
 385                 insets.append('other')
 386         if line.find('\\end_inset') > -1:
 387             # check which inset to end
 388             try:
 389                 cur_inset = insets.pop()
 390                 if cur_inset == 'ert':
 391                     in_ert = False
 392                 elif cur_inset == 'math':
 393                     in_math = False
 394                 else:
 395                     pass # end of other inset
 396             except:
 397                 pass # inset list was empty (for some reason)
 398
 399         # Try to write the line
 400         try:
 401             # If all goes well the line is written here
 402             dummy = line.encode(document.encoding)
 403             last_char = line[-1]
 404             i += 1
 405         except:
 406             # Error, some character(s) in the line need to be replaced
 407             mod_line = u''
 408             for character in line:
 409                 try:
 410                     # Try to write the character
 411                     dummy = character.encode(document.encoding)
 412                     mod_line += character
 413                     last_char = character
 414                 except:
 415                     # Try to replace with ERT/math inset
 416                     if spec_chars.has_key(character):
 417                         command = spec_chars[character][0] # the command to replace unicode
 418                         flag1 = spec_chars[character][1]
 419                         flag2 = spec_chars[character][2]
 420                         if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 421                             # We have a character that should be combined with the previous
 422                             command += '{' + last_char + '}'
 423                             # Remove the last character. Ignore if it is whitespace
 424                             if len(last_char.rstrip()):
 425                                 # last_char was found and is not whitespace
 426                                 if mod_line:
 427                                     mod_line = mod_line[:-1]
 428                                 else: # last_char belongs to the last line
 429                                     document.body[i-1] = document.body[i-1][:-1]
 430                             else:
 431                                 # The last character was replaced by a command. For now it is
 432                                 # ignored. This could be handled better.
 433                                 pass
 434                         if command[0:2] == '\\\\':
 435                             if command[2:12]=='ensuremath':
 436                                 if in_ert:
 437                                     # math in ERT
 438                                     command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 439                                     command = command.replace('}', '$\n')
 440                                 elif not in_math:
 441                                     # add a math inset with the replacement character
 442                                     command = command.replace('\\\\ensuremath{\\', math_intro)
 443                                     command = command.replace('}', math_outro)
 444                                 else:
 445                                     # we are already in a math inset
 446                                     command = command.replace('\\\\ensuremath{\\', '')
 447                                     command = command.replace('}', '')
 448                             else:
 449                                 if in_math:
 450                                     # avoid putting an ERT in a math; instead put command as text
 451                                     command = command.replace('\\\\', '\mathrm{')
 452                                     command = command + '}'
 453                                 elif not in_ert:
 454                                     # add an ERT inset with the replacement character
 455                                     command = command.replace('\\\\', ert_intro)
 456                                     command = command + ert_outro
 457                                 else:
 458                                     command = command.replace('\\\\', '\n\\backslash\n')
 459                             last_char = '' # indicate that the character should not be removed
 460                         mod_line += command
 461                     else:
 462                         # Replace with replacement string
 463                         mod_line += replacement_character
 464             document.body[i:i+1] = mod_line.split('\n')
 465             i += len(mod_line.split('\n'))
 466
 467
 468 def revert_cs_label(document):
 469     " Remove status flag of charstyle label. "
 470     i = 0
 471     while 1:
 472         i = find_token(document.body, "\\begin_inset CharStyle", i)
 473         if i == -1:
 474             return
 475         # Seach for a line starting 'show_label'
 476         # If it is not there, break with a warning message
 477         i = i + 1
 478         while 1:
 479             if (document.body[i][:10] == "show_label"):
 480                 del document.body[i]
 481                 break
 482             elif (document.body[i][:13] == "\\begin_layout"):
 483                 document.warning("Malformed LyX document: Missing 'show_label'.")
 484                 break
 485             i = i + 1
 486
 487         i = i + 1
 488
 489
 490 def convert_bibitem(document):
 491     """ Convert
 492 \bibitem [option]{argument}
 493
 494 to
 495
 496 \begin_inset LatexCommand bibitem
 497 label "option"
 498 key "argument"
 499
 500 \end_inset
 501
 502 This must be called after convert_commandparams.
 503 """
 504     i = 0
 505     while 1:
 506         i = find_token(document.body, "\\bibitem", i)
 507         if i == -1:
 508             break
 509         j = document.body[i].find('[') + 1
 510         k = document.body[i].rfind(']')
 511         if j == 0: # No optional argument found
 512             option = None
 513         else:
 514             option = document.body[i][j:k]
 515         j = document.body[i].rfind('{') + 1
 516         k = document.body[i].rfind('}')
 517         argument = document.body[i][j:k]
 518         lines = ['\\begin_inset LatexCommand bibitem']
 519         if option != None:
 520             lines.append('label "%s"' % option.replace('"', '\\"'))
 521         lines.append('key "%s"' % argument.replace('"', '\\"'))
 522         lines.append('')
 523         lines.append('\\end_inset')
 524         document.body[i:i+1] = lines
 525         i = i + 1
 526
 527
 528 commandparams_info = {
 529     # command : [option1, option2, argument]
 530     "bibitem" : ["label", "", "key"],
 531     "bibtex" : ["options", "btprint", "bibfiles"],
 532     "cite"        : ["after", "before", "key"],
 533     "citet"       : ["after", "before", "key"],
 534     "citep"       : ["after", "before", "key"],
 535     "citealt"     : ["after", "before", "key"],
 536     "citealp"     : ["after", "before", "key"],
 537     "citeauthor"  : ["after", "before", "key"],
 538     "citeyear"    : ["after", "before", "key"],
 539     "citeyearpar" : ["after", "before", "key"],
 540     "citet*"      : ["after", "before", "key"],
 541     "citep*"      : ["after", "before", "key"],
 542     "citealt*"    : ["after", "before", "key"],
 543     "citealp*"    : ["after", "before", "key"],
 544     "citeauthor*" : ["after", "before", "key"],
 545     "Citet"       : ["after", "before", "key"],
 546     "Citep"       : ["after", "before", "key"],
 547     "Citealt"     : ["after", "before", "key"],
 548     "Citealp"     : ["after", "before", "key"],
 549     "Citeauthor"  : ["after", "before", "key"],
 550     "Citet*"      : ["after", "before", "key"],
 551     "Citep*"      : ["after", "before", "key"],
 552     "Citealt*"    : ["after", "before", "key"],
 553     "Citealp*"    : ["after", "before", "key"],
 554     "Citeauthor*" : ["after", "before", "key"],
 555     "citefield"   : ["after", "before", "key"],
 556     "citetitle"   : ["after", "before", "key"],
 557     "cite*"       : ["after", "before", "key"],
 558     "hfill" : ["", "", ""],
 559     "index"      : ["", "", "name"],
 560     "printindex" : ["", "", "name"],
 561     "label" : ["", "", "name"],
 562     "eqref"     : ["name", "", "reference"],
 563     "pageref"   : ["name", "", "reference"],
 564     "prettyref" : ["name", "", "reference"],
 565     "ref"       : ["name", "", "reference"],
 566     "vpageref"  : ["name", "", "reference"],
 567     "vref"      : ["name", "", "reference"],
 568     "tableofcontents" : ["", "", "type"],
 569     "htmlurl" : ["name", "", "target"],
 570     "url"     : ["name", "", "target"]}
 571
 572
 573 def convert_commandparams(document):
 574     """ Convert
 575
 576  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 577  \end_inset
 578
 579  to
 580
 581  \begin_inset LatexCommand cmdname
 582  name1 "opt1"
 583  name2 "opt2"
 584  name3 "arg"
 585  \end_inset
 586
 587  name1, name2 and name3 can be different for each command.
 588 """
 589     # \begin_inset LatexCommand bibitem was not the official version (see
 590     # convert_bibitem()), but could be read in, so we convert it here, too.
 591
 592     i = 0
 593     while 1:
 594         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 595         if i == -1:
 596             break
 597         command = document.body[i][26:].strip()
 598         if command == "":
 599             document.warning("Malformed LyX document: Missing LatexCommand name.")
 600             i = i + 1
 601             continue
 602
 603         j = find_token(document.body, "\\end_inset", i + 1)
 604         if j == -1:
 605             document.warning("Malformed document")
 606         else:
 607             command += "".join(document.body[i+1:j])
 608             document.body[i+1:j] = []
 609
 610         # The following parser is taken from the original InsetCommandParams::scanCommand
 611         name = ""
 612         option1 = ""
 613         option2 = ""
 614         argument = ""
 615         state = "WS"
 616         # Used to handle things like \command[foo[bar]]{foo{bar}}
 617         nestdepth = 0
 618         b = 0
 619         for c in command:
 620             if ((state == "CMDNAME" and c == ' ') or
 621                 (state == "CMDNAME" and c == '[') or
 622                 (state == "CMDNAME" and c == '{')):
 623                 state = "WS"
 624             if ((state == "OPTION" and c == ']') or
 625                 (state == "SECOPTION" and c == ']') or
 626                 (state == "CONTENT" and c == '}')):
 627                 if nestdepth == 0:
 628                     state = "WS"
 629                 else:
 630                     nestdepth = nestdepth - 1
 631             if ((state == "OPTION" and c == '[') or
 632                 (state == "SECOPTION" and c == '[') or
 633                 (state == "CONTENT" and c == '{')):
 634                 nestdepth = nestdepth + 1
 635             if state == "CMDNAME":
 636                     name += c
 637             elif state == "OPTION":
 638                     option1 += c
 639             elif state == "SECOPTION":
 640                     option2 += c
 641             elif state == "CONTENT":
 642                     argument += c
 643             elif state == "WS":
 644                 if c == '\\':
 645                     state = "CMDNAME"
 646                 elif c == '[' and b != ']':
 647                     state = "OPTION"
 648                     nestdepth = 0 # Just to be sure
 649                 elif c == '[' and b == ']':
 650                     state = "SECOPTION"
 651                     nestdepth = 0 # Just to be sure
 652                 elif c == '{':
 653                     state = "CONTENT"
 654                     nestdepth = 0 # Just to be sure
 655             b = c
 656
 657         # Now we have parsed the command, output the parameters
 658         lines = ["\\begin_inset LatexCommand %s" % name]
 659         if option1 != "":
 660             if commandparams_info[name][0] == "":
 661                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 662             else:
 663                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 664         if option2 != "":
 665             if commandparams_info[name][1] == "":
 666                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 667             else:
 668                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 669         if argument != "":
 670             if commandparams_info[name][2] == "":
 671                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 672             else:
 673                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 674         document.body[i:i+1] = lines
 675         i = i + 1
 676
 677
 678 def revert_commandparams(document):
 679     regex = re.compile(r'(\S+)\s+(.+)')
 680     i = 0
 681     while 1:
 682         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 683         if i == -1:
 684             break
 685         name = document.body[i].split()[2]
 686         j = find_end_of_inset(document.body, i + 1)
 687         preview_line = ""
 688         option1 = ""
 689         option2 = ""
 690         argument = ""
 691         for k in range(i + 1, j):
 692             match = re.match(regex, document.body[k])
 693             if match:
 694                 pname = match.group(1)
 695                 pvalue = match.group(2)
 696                 if pname == "preview":
 697                     preview_line = document.body[k]
 698                 elif (commandparams_info[name][0] != "" and
 699                       pname == commandparams_info[name][0]):
 700                     option1 = pvalue.strip('"').replace('\\"', '"')
 701                 elif (commandparams_info[name][1] != "" and
 702                       pname == commandparams_info[name][1]):
 703                     option2 = pvalue.strip('"').replace('\\"', '"')
 704                 elif (commandparams_info[name][2] != "" and
 705                       pname == commandparams_info[name][2]):
 706                     argument = pvalue.strip('"').replace('\\"', '"')
 707             elif document.body[k].strip() != "":
 708                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 709         if name == "bibitem":
 710             if option1 == "":
 711                 lines = ["\\bibitem {%s}" % argument]
 712             else:
 713                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 714         else:
 715             if option1 == "":
 716                 if option2 == "":
 717                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 718                 else:
 719                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 720             else:
 721                 if option2 == "":
 722                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 723                 else:
 724                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 725         if name != "bibitem":
 726             if preview_line != "":
 727                 lines.append(preview_line)
 728             lines.append('')
 729             lines.append('\\end_inset')
 730         document.body[i:j+1] = lines
 731         i = j + 1
 732
 733
 734 def revert_nomenclature(document):
 735     " Convert nomenclature entry to ERT. "
 736     regex = re.compile(r'(\S+)\s+(.+)')
 737     i = 0
 738     use_nomencl = 0
 739     while 1:
 740         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 741         if i == -1:
 742             break
 743         use_nomencl = 1
 744         j = find_end_of_inset(document.body, i + 1)
 745         preview_line = ""
 746         symbol = ""
 747         description = ""
 748         prefix = ""
 749         for k in range(i + 1, j):
 750             match = re.match(regex, document.body[k])
 751             if match:
 752                 name = match.group(1)
 753                 value = match.group(2)
 754                 if name == "preview":
 755                     preview_line = document.body[k]
 756                 elif name == "symbol":
 757                     symbol = value.strip('"').replace('\\"', '"')
 758                 elif name == "description":
 759                     description = value.strip('"').replace('\\"', '"')
 760                 elif name == "prefix":
 761                     prefix = value.strip('"').replace('\\"', '"')
 762             elif document.body[k].strip() != "":
 763                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 764         if prefix == "":
 765             command = 'nomenclature{%s}{%s}' % (symbol, description)
 766         else:
 767             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 768         document.body[i:j+1] = ['\\begin_inset ERT',
 769                                 'status collapsed',
 770                                 '',
 771                                 '\\begin_layout %s' % document.default_layout,
 772                                 '',
 773                                 '',
 774                                 '\\backslash',
 775                                 command,
 776                                 '\\end_layout',
 777                                 '',
 778                                 '\\end_inset']
 779         i = i + 11
 780     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 781         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 782         document.preamble.append('\\makenomenclature')
 783
 784
 785 def revert_printnomenclature(document):
 786     " Convert printnomenclature to ERT. "
 787     regex = re.compile(r'(\S+)\s+(.+)')
 788     i = 0
 789     use_nomencl = 0
 790     while 1:
 791         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 792         if i == -1:
 793             break
 794         use_nomencl = 1
 795         j = find_end_of_inset(document.body, i + 1)
 796         preview_line = ""
 797         labelwidth = ""
 798         for k in range(i + 1, j):
 799             match = re.match(regex, document.body[k])
 800             if match:
 801                 name = match.group(1)
 802                 value = match.group(2)
 803                 if name == "preview":
 804                     preview_line = document.body[k]
 805                 elif name == "labelwidth":
 806                     labelwidth = value.strip('"').replace('\\"', '"')
 807             elif document.body[k].strip() != "":
 808                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 809         if labelwidth == "":
 810             command = 'nomenclature{}'
 811         else:
 812             command = 'nomenclature[%s]' % labelwidth
 813         document.body[i:j+1] = ['\\begin_inset ERT',
 814                                 'status collapsed',
 815                                 '',
 816                                 '\\begin_layout %s' % document.default_layout,
 817                                 '',
 818                                 '',
 819                                 '\\backslash',
 820                                 command,
 821                                 '\\end_layout',
 822                                 '',
 823                                 '\\end_inset']
 824         i = i + 11
 825     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 826         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 827         document.preamble.append('\\makenomenclature')
 828
 829
 830 def convert_esint(document):
 831     " Add \\use_esint setting to header. "
 832     i = find_token(document.header, "\\cite_engine", 0)
 833     if i == -1:
 834         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 835         return
 836     # 0 is off, 1 is auto, 2 is on.
 837     document.header.insert(i, '\\use_esint 0')
 838
 839
 840 def revert_esint(document):
 841     " Remove \\use_esint setting from header. "
 842     i = find_token(document.header, "\\use_esint", 0)
 843     if i == -1:
 844         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 845         return
 846     use_esint = document.header[i].split()[1]
 847     del document.header[i]
 848     # 0 is off, 1 is auto, 2 is on.
 849     if (use_esint == 2):
 850         document.preamble.append('\\usepackage{esint}')
 851
 852
 853 def revert_clearpage(document):
 854     " clearpage -> ERT "
 855     i = 0
 856     while 1:
 857         i = find_token(document.body, "\\clearpage", i)
 858         if i == -1:
 859             break
 860         document.body[i:i+1] =  ['\\begin_inset ERT',
 861                                 'status collapsed',
 862                                 '',
 863                                 '\\begin_layout %s' % document.default_layout,
 864                                 '',
 865                                 '',
 866                                 '\\backslash',
 867                                 'clearpage',
 868                                 '\\end_layout',
 869                                 '',
 870                                 '\\end_inset']
 871     i = i + 1
 872
 873
 874 def revert_cleardoublepage(document):
 875     " cleardoublepage -> ERT "
 876     i = 0
 877     while 1:
 878         i = find_token(document.body, "\\cleardoublepage", i)
 879         if i == -1:
 880             break
 881         document.body[i:i+1] =  ['\\begin_inset ERT',
 882                                 'status collapsed',
 883                                 '',
 884                                 '\\begin_layout %s' % document.default_layout,
 885                                 '',
 886                                 '',
 887                                 '\\backslash',
 888                                 'cleardoublepage',
 889                                 '\\end_layout',
 890                                 '',
 891                                 '\\end_inset']
 892     i = i + 1
 893
 894
 895 def convert_lyxline(document):
 896     " remove fontsize commands for \lyxline "
 897     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 898     # to change the line thickness. The new definiton does this so that imported
 899     # \lyxlines would have a different line thickness. The eventual fontsize command
 900     # before \lyxline is therefore removed to get the same output.
 901     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 902                  "large", "Large", "LARGE", "huge", "Huge"]
 903     for n in range(0, len(fontsizes)):
 904         i = 0
 905         k = 0
 906         while i < len(document.body):
 907             i = find_token(document.body, "\\size " + fontsizes[n], i)
 908             k = find_token(document.body, "\\lyxline", i)
 909             # the corresponding fontsize command is always 2 lines before the \lyxline
 910             if (i != -1 and k == i+2):
 911                 document.body[i:i+1] = []
 912             else:
 913                 break
 914         i = i + 1
 915
 916
 917 def revert_encodings(document):
 918     " Set new encodings to auto. "
 919     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 920                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 921                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 922     i = find_token(document.header, "\\inputencoding", 0)
 923     if i == -1:
 924         document.header.append("\\inputencoding auto")
 925     else:
 926         inputenc = get_value(document.header, "\\inputencoding", i)
 927         if inputenc in encodings:
 928             document.header[i] = "\\inputencoding auto"
 929     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 930
 931
 932 def convert_caption(document):
 933     " Convert caption layouts to caption insets. "
 934     i = 0
 935     while 1:
 936         i = find_token(document.body, "\\begin_layout Caption", i)
 937         if i == -1:
 938             return
 939         j = find_end_of_layout(document.body, i)
 940         if j == -1:
 941             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 942             return
 943
 944         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 945         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 946                             "\\begin_inset Caption", "",
 947                             "\\begin_layout %s" % document.default_layout]
 948         i = i + 1
 949
 950
 951 def revert_caption(document):
 952     " Convert caption insets to caption layouts. "
 953     " This assumes that the text class has a caption style. "
 954     i = 0
 955     while 1:
 956         i = find_token(document.body, "\\begin_inset Caption", i)
 957         if i == -1:
 958             return
 959
 960         # We either need to delete the previous \begin_layout line, or we
 961         # need to end the previous layout if this inset is not in the first
 962         # position of the paragraph.
 963         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 964         if layout_before == -1:
 965             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 966             return
 967         layout_line = document.body[layout_before]
 968         del_layout_before = True
 969         l = layout_before + 1
 970         while l < i:
 971             if document.body[l] != "":
 972                 del_layout_before = False
 973                 break
 974             l = l + 1
 975         if del_layout_before:
 976             del document.body[layout_before:i]
 977             i = layout_before
 978         else:
 979             document.body[i:i] = ["\\end_layout", ""]
 980             i = i + 2
 981
 982         # Find start of layout in the inset and end of inset
 983         j = find_token(document.body, "\\begin_layout", i)
 984         if j == -1:
 985             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 986             return
 987         k = find_end_of_inset(document.body, i)
 988         if k == -1:
 989             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 990             return
 991
 992         # We either need to delete the following \end_layout line, or we need
 993         # to restart the old layout if this inset is not at the paragraph end.
 994         layout_after = find_token(document.body, "\\end_layout", k)
 995         if layout_after == -1:
 996             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 997             return
 998         del_layout_after = True
 999         l = k + 1
1000         while l < layout_after:
1001             if document.body[l] != "":
1002                 del_layout_after = False
1003                 break
1004             l = l + 1
1005         if del_layout_after:
1006             del document.body[k+1:layout_after+1]
1007         else:
1008             document.body[k+1:k+1] = [layout_line, ""]
1009
1010         # delete \begin_layout and \end_inset and replace \begin_inset with
1011         # "\begin_layout Caption". This works because we can only have one
1012         # paragraph in the caption inset: The old \end_layout will be recycled.
1013         del document.body[k]
1014         if document.body[k] == "":
1015             del document.body[k]
1016         del document.body[j]
1017         if document.body[j] == "":
1018             del document.body[j]
1019         document.body[i] = "\\begin_layout Caption"
1020         if document.body[i+1] == "":
1021             del document.body[i+1]
1022         i = i + 1
1023
1024
1025 # Accents of InsetLaTeXAccent
1026 accent_map = {
1027     "`" : u'\u0300', # grave
1028     "'" : u'\u0301', # acute
1029     "^" : u'\u0302', # circumflex
1030     "~" : u'\u0303', # tilde
1031     "=" : u'\u0304', # macron
1032     "u" : u'\u0306', # breve
1033     "." : u'\u0307', # dot above
1034     "\"": u'\u0308', # diaeresis
1035     "r" : u'\u030a', # ring above
1036     "H" : u'\u030b', # double acute
1037     "v" : u'\u030c', # caron
1038     "b" : u'\u0320', # minus sign below
1039     "d" : u'\u0323', # dot below
1040     "c" : u'\u0327', # cedilla
1041     "k" : u'\u0328', # ogonek
1042     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1043                      # only one is given as argument, so we don't need to
1044                      # treat it differently.
1045 }
1046
1047
1048 # special accents of InsetLaTeXAccent without argument
1049 special_accent_map = {
1050     'i' : u'\u0131', # dotless i
1051     'j' : u'\u0237', # dotless j
1052     'l' : u'\u0142', # l with stroke
1053     'L' : u'\u0141'  # L with stroke
1054 }
1055
1056
1057 # special accent arguments of InsetLaTeXAccent
1058 accented_map = {
1059     '\\i' : u'\u0131', # dotless i
1060     '\\j' : u'\u0237'  # dotless j
1061 }
1062
1063
1064 def _convert_accent(accent, accented_char):
1065     type = accent
1066     char = accented_char
1067     if char == '':
1068         if type in special_accent_map:
1069             return special_accent_map[type]
1070         # a missing char is treated as space by LyX
1071         char = ' '
1072     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1073         # Special caron, only used with t, d, l and L.
1074         # It is not in the map because we convert it to the same unicode
1075         # character as the normal caron: \q{} is only defined if babel with
1076         # the czech or slovak language is used, and the normal caron
1077         # produces the correct output if the T1 font encoding is used.
1078         # For the same reason we never convert to \q{} in the other direction.
1079         type = 'v'
1080     elif char in accented_map:
1081         char = accented_map[char]
1082     elif (len(char) > 1):
1083         # We can only convert accents on a single char
1084         return ''
1085     a = accent_map.get(type)
1086     if a:
1087         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1088     return ''
1089
1090
1091 def convert_ertbackslash(body, i, ert, default_layout):
1092     r""" -------------------------------------------------------------------------------------------
1093     Convert backslashes and '\n' into valid ERT code, append the converted
1094     text to body[i] and return the (maybe incremented) line index i"""
1095
1096     for c in ert:
1097         if c == '\\':
1098             body[i] = body[i] + '\\backslash '
1099             i = i + 1
1100             body.insert(i, '')
1101         elif c == '\n':
1102             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1103             i = i + 4
1104         else:
1105             body[i] = body[i] + c
1106     return i
1107
1108
1109 def convert_accent(document):
1110     # The following forms are supported by LyX:
1111     # '\i \"{a}' (standard form, as written by LyX)
1112     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1113     # '\i \"{ }' (also accepted if the accented char is a space)
1114     # '\i \" a'  (also accepted)
1115     # '\i \"'    (also accepted)
1116     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1117     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1118     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1119     i = 0
1120     while 1:
1121         i = find_re(document.body, re_wholeinset, i)
1122         if i == -1:
1123             return
1124         match = re_wholeinset.match(document.body[i])
1125         prefix = match.group(1)
1126         contents = match.group(3).strip()
1127         match = re_contents.match(contents)
1128         if match:
1129             # Strip first char (always \)
1130             accent = match.group(1)[1:]
1131             accented_contents = match.group(2).strip()
1132             match = re_accentedcontents.match(accented_contents)
1133             accented_char = match.group(1)
1134             converted = _convert_accent(accent, accented_char)
1135             if converted == '':
1136                 # Normalize contents
1137                 contents = '%s{%s}' % (accent, accented_char),
1138             else:
1139                 document.body[i] = '%s%s' % (prefix, converted)
1140                 i += 1
1141                 continue
1142         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1143         document.body[i] = prefix
1144         document.body[i+1:i+1] = ['\\begin_inset ERT',
1145                                   'status collapsed',
1146                                   '',
1147                                   '\\begin_layout %s' % document.default_layout,
1148                                   '',
1149                                   '',
1150                                   '']
1151         i = convert_ertbackslash(document.body, i + 7,
1152                                  '\\%s' % contents,
1153                                  document.default_layout)
1154         document.body[i+1:i+1] = ['\\end_layout',
1155                                   '',
1156                                   '\\end_inset']
1157         i += 3
1158
1159
1160 def revert_accent(document):
1161     inverse_accent_map = {}
1162     for k in accent_map:
1163         inverse_accent_map[accent_map[k]] = k
1164     inverse_special_accent_map = {}
1165     for k in special_accent_map:
1166         inverse_special_accent_map[special_accent_map[k]] = k
1167     inverse_accented_map = {}
1168     for k in accented_map:
1169         inverse_accented_map[accented_map[k]] = k
1170
1171     # Since LyX may insert a line break within a word we must combine all
1172     # words before unicode normalization.
1173     # We do this only if the next line starts with an accent, otherwise we
1174     # would create things like '\begin_inset ERTstatus'.
1175     numberoflines = len(document.body)
1176     for i in range(numberoflines-1):
1177         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1178             continue
1179         if (document.body[i+1][0] in inverse_accent_map):
1180             # the last character of this line and the first of the next line
1181             # form probably a surrogate pair.
1182             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1183                 document.body[i] += document.body[i+1][0]
1184                 document.body[i+1] = document.body[i+1][1:]
1185
1186     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1187     # This is needed to catch all accented characters.
1188     for i in range(numberoflines):
1189         # Unfortunately we have a mixture of unicode strings and plain strings,
1190         # because we never use u'xxx' for string literals, but 'xxx'.
1191         # Therefore we may have to try two times to normalize the data.
1192         try:
1193             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1194         except TypeError:
1195             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1196
1197     # Replace accented characters with InsetLaTeXAccent
1198     # Do not convert characters that can be represented in the chosen
1199     # encoding.
1200     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1201     lang_re = re.compile(r"^\\lang\s(\S+)")
1202     i = 0
1203     while i < len(document.body):
1204
1205         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1206             # Track the encoding of the current line
1207             result = lang_re.match(document.body[i])
1208             if result:
1209                 language = result.group(1)
1210                 if language == "default":
1211                     encoding_stack[-1] = document.encoding
1212                 else:
1213                     from lyx2lyx_lang import lang
1214                     encoding_stack[-1] = lang[language][3]
1215                 continue
1216             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1217                 encoding_stack.append(encoding_stack[-1])
1218                 continue
1219             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1220                 del encoding_stack[-1]
1221                 continue
1222
1223         for j in range(len(document.body[i])):
1224             # dotless i and dotless j are both in special_accent_map and can
1225             # occur as an accented character, so we need to test that the
1226             # following character is no accent
1227             if (document.body[i][j] in inverse_special_accent_map and
1228                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1229                 accent = document.body[i][j]
1230                 try:
1231                     dummy = accent.encode(encoding_stack[-1])
1232                 except UnicodeEncodeError:
1233                     # Insert the rest of the line as new line
1234                     if j < len(document.body[i]) - 1:
1235                         document.body.insert(i+1, document.body[i][j+1:])
1236                     # Delete the accented character
1237                     if j > 0:
1238                         document.body[i] = document.body[i][:j-1]
1239                     else:
1240                         document.body[i] = u''
1241                     # Finally add the InsetLaTeXAccent
1242                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1243                     break
1244             elif j > 0 and document.body[i][j] in inverse_accent_map:
1245                 accented_char = document.body[i][j-1]
1246                 if accented_char == ' ':
1247                     # Conform to LyX output
1248                     accented_char = ''
1249                 elif accented_char in inverse_accented_map:
1250                     accented_char = inverse_accented_map[accented_char]
1251                 accent = document.body[i][j]
1252                 try:
1253                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1254                 except UnicodeEncodeError:
1255                     # Insert the rest of the line as new line
1256                     if j < len(document.body[i]) - 1:
1257                         document.body.insert(i+1, document.body[i][j+1:])
1258                     # Delete the accented characters
1259                     if j > 1:
1260                         document.body[i] = document.body[i][:j-2]
1261                     else:
1262                         document.body[i] = u''
1263                     # Finally add the InsetLaTeXAccent
1264                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1265                     break
1266         i = i + 1
1267
1268     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1269     for i in range(numberoflines):
1270         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1271
1272
1273 def normalize_font_whitespace_259(document):
1274     """ Before format 259 the font changes were ignored if a
1275     whitespace was the first or last character in the sequence, this function
1276     transfers the whitespace outside."""
1277
1278     char_properties = {"\\series": "default",
1279                        "\\emph": "default",
1280                        "\\color": "none",
1281                        "\\shape": "default",
1282                        "\\bar": "default",
1283                        "\\family": "default"}
1284     return normalize_font_whitespace(document, char_properties)
1285
1286 def normalize_font_whitespace_274(document):
1287     """ Before format 259 (sic) the font changes were ignored if a
1288     whitespace was the first or last character in the sequence. This was
1289     corrected for most font properties in format 259, but the language
1290     was forgotten then. This function applies the same conversion done
1291     there (namely, transfers the whitespace outside) for font language
1292     changes, as well."""
1293
1294     char_properties = {"\\lang": "default"}
1295     return normalize_font_whitespace(document, char_properties)
1296
1297 def get_paragraph_language(document, i):
1298     """ Return the language of the paragraph in which line i of the document
1299     body is. If the first thing in the paragraph is a \\lang command, that
1300     is the paragraph's langauge; otherwise, the paragraph's language is the
1301     document's language."""
1302
1303     lines = document.body
1304
1305     first_nonempty_line = \
1306         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1307
1308     words = lines[first_nonempty_line].split()
1309
1310     if len(words) > 1 and words[0] == "\\lang":
1311         return words[1]
1312     else:
1313         return document.language
1314
1315 def normalize_font_whitespace(document, char_properties):
1316     """ Before format 259 the font changes were ignored if a
1317     whitespace was the first or last character in the sequence, this function
1318     transfers the whitespace outside. Only a change in one of the properties
1319     in the provided     char_properties is handled by this function."""
1320
1321     if document.backend != "latex":
1322         return
1323
1324     lines = document.body
1325
1326     changes = {}
1327
1328     i = 0
1329     while i < len(lines):
1330         words = lines[i].split()
1331
1332         if len(words) > 0 and words[0] == "\\begin_layout":
1333             # a new paragraph resets all font changes
1334             changes.clear()
1335             # also reset the default language to be the paragraph's language
1336             if "\\lang" in char_properties.keys():
1337                 char_properties["\\lang"] = \
1338                     get_paragraph_language(document, i + 1)
1339
1340         elif len(words) > 1 and words[0] in char_properties.keys():
1341             # we have a font change
1342             if char_properties[words[0]] == words[1]:
1343                 # property gets reset
1344                 if words[0] in changes.keys():
1345                     del changes[words[0]]
1346                 defaultproperty = True
1347             else:
1348                 # property gets set
1349                 changes[words[0]] = words[1]
1350                 defaultproperty = False
1351
1352             # We need to explicitly reset all changed properties if we find
1353             # a space below, because LyX 1.4 would output the space after
1354             # closing the previous change and before starting the new one,
1355             # and closing a font change means to close all properties, not
1356             # just the changed one.
1357
1358             if lines[i-1] and lines[i-1][-1] == " ":
1359                 lines[i-1] = lines[i-1][:-1]
1360                 # a space before the font change
1361                 added_lines = [" "]
1362                 for k in changes.keys():
1363                     # exclude property k because that is already in lines[i]
1364                     if k != words[0]:
1365                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1366                 for k in changes.keys():
1367                     # exclude property k because that must be added below anyway
1368                     if k != words[0]:
1369                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1370                 if defaultproperty:
1371                     # Property is reset in lines[i], so add the new stuff afterwards
1372                     lines[i+1:i+1] = added_lines
1373                 else:
1374                     # Reset property for the space
1375                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1376                     lines[i:i] = added_lines
1377                 i = i + len(added_lines)
1378
1379             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1380                 # a space after the font change
1381                 if (lines[i+1] == " " and lines[i+2]):
1382                     next_words = lines[i+2].split()
1383                     if len(next_words) > 0 and next_words[0] == words[0]:
1384                         # a single blank with a property different from the
1385                         # previous and the next line must not be changed
1386                         i = i + 2
1387                         continue
1388                 lines[i+1] = lines[i+1][1:]
1389                 added_lines = [" "]
1390                 for k in changes.keys():
1391                     # exclude property k because that is already in lines[i]
1392                     if k != words[0]:
1393                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1394                 for k in changes.keys():
1395                     # exclude property k because that must be added below anyway
1396                     if k != words[0]:
1397                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1398                 # Reset property for the space
1399                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1400                 lines[i:i] = added_lines
1401                 i = i + len(added_lines)
1402
1403         i = i + 1
1404
1405
1406 def revert_utf8x(document):
1407     " Set utf8x encoding to utf8. "
1408     i = find_token(document.header, "\\inputencoding", 0)
1409     if i == -1:
1410         document.header.append("\\inputencoding auto")
1411     else:
1412         inputenc = get_value(document.header, "\\inputencoding", i)
1413         if inputenc == "utf8x":
1414             document.header[i] = "\\inputencoding utf8"
1415     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1416
1417
1418 def revert_utf8plain(document):
1419     " Set utf8plain encoding to utf8. "
1420     i = find_token(document.header, "\\inputencoding", 0)
1421     if i == -1:
1422         document.header.append("\\inputencoding auto")
1423     else:
1424         inputenc = get_value(document.header, "\\inputencoding", i)
1425         if inputenc == "utf8-plain":
1426             document.header[i] = "\\inputencoding utf8"
1427     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1428
1429
1430 def revert_beamer_alert(document):
1431     " Revert beamer's \\alert inset back to ERT. "
1432     i = 0
1433     while 1:
1434         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1435         if i == -1:
1436             return
1437         document.body[i] = "\\begin_inset ERT"
1438         i = i + 1
1439         while 1:
1440             if (document.body[i][:13] == "\\begin_layout"):
1441                 # Insert the \alert command
1442                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1443                 break
1444             i = i + 1
1445
1446         i = i + 1
1447
1448
1449 def revert_beamer_structure(document):
1450     " Revert beamer's \\structure inset back to ERT. "
1451     i = 0
1452     while 1:
1453         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1454         if i == -1:
1455             return
1456         document.body[i] = "\\begin_inset ERT"
1457         i = i + 1
1458         while 1:
1459             if (document.body[i][:13] == "\\begin_layout"):
1460                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1461                 break
1462             i = i + 1
1463
1464         i = i + 1
1465
1466
1467 def convert_changes(document):
1468     " Switch output_changes off if tracking_changes is off. "
1469     i = find_token(document.header, '\\tracking_changes', 0)
1470     if i == -1:
1471         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1472         return
1473     j = find_token(document.header, '\\output_changes', 0)
1474     if j == -1:
1475         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1476         return
1477     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1478     output_changes = get_value(document.header, "\\output_changes", j)
1479     if tracking_changes == "false" and output_changes == "true":
1480         document.header[j] = "\\output_changes false"
1481
1482
1483 def revert_ascii(document):
1484     " Set ascii encoding to auto. "
1485     i = find_token(document.header, "\\inputencoding", 0)
1486     if i == -1:
1487         document.header.append("\\inputencoding auto")
1488     else:
1489         inputenc = get_value(document.header, "\\inputencoding", i)
1490         if inputenc == "ascii":
1491             document.header[i] = "\\inputencoding auto"
1492     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1493
1494
1495 def normalize_language_name(document):
1496     lang = { "brazil": "brazilian",
1497              "portuges": "portuguese"}
1498
1499     if document.language in lang:
1500         document.language = lang[document.language]
1501         i = find_token(document.header, "\\language", 0)
1502         document.header[i] = "\\language %s" % document.language
1503
1504
1505 def revert_language_name(document):
1506     lang = { "brazilian": "brazil",
1507              "portuguese": "portuges"}
1508
1509     if document.language in lang:
1510         document.language = lang[document.language]
1511         i = find_token(document.header, "\\language", 0)
1512         document.header[i] = "\\language %s" % document.language
1513
1514 #
1515 #  \textclass cv -> \textclass simplecv
1516 def convert_cv_textclass(document):
1517     if document.textclass == "cv":
1518         document.textclass = "simplecv"
1519
1520
1521 def revert_cv_textclass(document):
1522     if document.textclass == "simplecv":
1523         document.textclass = "cv"
1524
1525
1526 #
1527 # add scaleBeforeRotation graphics param
1528 def convert_graphics_rotation(document):
1529     " add scaleBeforeRotation graphics parameter. "
1530     i = 0
1531     while 1:
1532         i = find_token(document.body, "\\begin_inset Graphics", i)
1533         if i == -1:
1534             return
1535         j = find_end_of_inset(document.body, i+1)
1536         if j == -1:
1537             # should not happen
1538             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1539         # Seach for rotateAngle and width or height or scale
1540         # If these params are not there, nothing needs to be done.
1541         k = find_token(document.body, "\trotateAngle", i + 1, j)
1542         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1543         if (k != -1 and l != -1):
1544             document.body.insert(j, 'scaleBeforeRotation')
1545         i = i + 1
1546
1547
1548 #
1549 # remove scaleBeforeRotation graphics param
1550 def revert_graphics_rotation(document):
1551     " remove scaleBeforeRotation graphics parameter. "
1552     i = 0
1553     while 1:
1554         i = find_token(document.body, "\\begin_inset Graphics", i)
1555         if i == -1:
1556             return
1557         j = find_end_of_inset(document.body, i + 1)
1558         if j == -1:
1559             # should not happen
1560             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1561         # If there's a scaleBeforeRotation param, just remove that
1562         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1563         if k != -1:
1564             del document.body[k]
1565         else:
1566             # if not, and if we have rotateAngle and width or height or scale,
1567             # we have to put the rotateAngle value to special
1568             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1569             special = get_value(document.body, 'special', i + 1, j)
1570             if rotateAngle != "":
1571                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1572                 if k == -1:
1573                     break
1574                 if special == "":
1575                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1576                 else:
1577                     l = find_token(document.body, "\tspecial", i + 1, j)
1578                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1579                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1580                 if k != -1:
1581                     del document.body[k]
1582         i = i + 1
1583
1584
1585
1586 def convert_tableborder(document):
1587     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1588     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1589     i = 0
1590     while i < len(document.body):
1591         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1592         k = document.body[i].find("|>{", 0, len(document.body[i]))
1593         # the two tokens have to be in one line
1594         if (h != -1 and k != -1):
1595             # delete the "|"
1596             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1597         i = i + 1
1598
1599
1600 def revert_tableborder(document):
1601     i = 0
1602     while i < len(document.body):
1603         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1604         k = document.body[i].find(">{", 0, len(document.body[i]))
1605         # the two tokens have to be in one line
1606         if (h != -1 and k != -1):
1607             # add the "|"
1608             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1609         i = i + 1
1610
1611
1612 def revert_armenian(document):
1613
1614     # set inputencoding from armscii8 to auto
1615     if document.inputencoding == "armscii8":
1616         i = find_token(document.header, "\\inputencoding", 0)
1617         if i != -1:
1618             document.header[i] = "\\inputencoding auto"
1619     # check if preamble exists, if not k is set to -1
1620     i = 0
1621     k = -1
1622     while i < len(document.preamble):
1623         if k == -1:
1624             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1625         if k == -1:
1626             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1627         i = i + 1
1628     # add the entry \usepackage{armtex} to the document preamble
1629     if document.language == "armenian":
1630         # set the armtex entry as the first preamble line
1631         if k != -1:
1632             document.preamble[0:0] = ["\\usepackage{armtex}"]
1633         # create the preamble when it doesn't exist
1634         else:
1635             document.preamble.append('\\usepackage{armtex}')
1636     # Set document language from armenian to english
1637     if document.language == "armenian":
1638         document.language = "english"
1639         i = find_token(document.header, "\\language", 0)
1640         if i != -1:
1641             document.header[i] = "\\language english"
1642
1643
1644 def revert_CJK(document):
1645     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1646     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1647                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1648     i = find_token(document.header, "\\inputencoding", 0)
1649     if i == -1:
1650         document.header.append("\\inputencoding auto")
1651     else:
1652         inputenc = get_value(document.header, "\\inputencoding", i)
1653         if inputenc in encodings:
1654             document.header[i] = "\\inputencoding default"
1655     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1656
1657     if document.language == "chinese-simplified" or \
1658        document.language == "chinese-traditional" or \
1659        document.language == "japanese" or document.language == "korean":
1660         document.language = "english"
1661         i = find_token(document.header, "\\language", 0)
1662         if i != -1:
1663             document.header[i] = "\\language english"
1664
1665
1666 def revert_preamble_listings_params(document):
1667     " Revert preamble option \listings_params "
1668     i = find_token(document.header, "\\listings_params", 0)
1669     if i != -1:
1670         document.preamble.append('\\usepackage{listings}')
1671         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1672         document.header.pop(i);
1673
1674
1675 def revert_listings_inset(document):
1676     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1677 FROM
1678
1679 \begin_inset
1680 lstparams "language=Delphi"
1681 inline true
1682 status open
1683
1684 \begin_layout Standard
1685 var i = 10;
1686 \end_layout
1687
1688 \end_inset
1689
1690 TO
1691
1692 \begin_inset ERT
1693 status open
1694 \begin_layout Standard
1695
1696
1697 \backslash
1698 lstinline[language=Delphi]{var i = 10;}
1699 \end_layout
1700
1701 \end_inset
1702
1703 There can be an caption inset in this inset
1704
1705 \begin_layout Standard
1706 \begin_inset Caption
1707
1708 \begin_layout Standard
1709 before label
1710 \begin_inset LatexCommand label
1711 name "lst:caption"
1712
1713 \end_inset
1714
1715 after label
1716 \end_layout
1717
1718 \end_inset
1719
1720
1721 \end_layout
1722
1723 '''
1724     i = 0
1725     while True:
1726         i = find_token(document.body, '\\begin_inset listings', i)
1727         if i == -1:
1728             break
1729         else:
1730             if not '\\usepackage{listings}' in document.preamble:
1731                 document.preamble.append('\\usepackage{listings}')
1732         j = find_end_of_inset(document.body, i + 1)
1733         if j == -1:
1734             # this should not happen
1735             break
1736         inline = 'false'
1737         params = ''
1738         status = 'open'
1739         # first three lines
1740         for line in range(i + 1, i + 4):
1741             if document.body[line].startswith('inline'):
1742                 inline = document.body[line].split()[1]
1743             if document.body[line].startswith('lstparams'):
1744                 params = document.body[line].split()[1].strip('"')
1745             if document.body[line].startswith('status'):
1746                 status = document.body[line].split()[1].strip()
1747                 k = line + 1
1748         # caption?
1749         caption = ''
1750         label = ''
1751         cap = find_token(document.body, '\\begin_inset Caption', i)
1752         if cap != -1:
1753             cap_end = find_end_of_inset(document.body, cap + 1)
1754             if cap_end == -1:
1755                 # this should not happen
1756                 break
1757             # label?
1758             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1759             if lbl != -1:
1760                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1761                 if lbl_end == -1:
1762                     # this should not happen
1763                     break
1764             else:
1765                 lbl = cap_end
1766                 lbl_end = cap_end
1767             for line in document.body[lbl : lbl_end + 1]:
1768                 if line.startswith('name '):
1769                     label = line.split()[1].strip('"')
1770                     break
1771             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1772                 if not line.startswith('\\'):
1773                     caption += line.strip()
1774             k = cap_end + 1
1775         inlinecode = ''
1776         # looking for the oneline code for lstinline
1777         inlinecode = document.body[find_end_of_layout(document.body,
1778             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1779         if len(caption) > 0:
1780             if len(params) == 0:
1781                 params = 'caption={%s}' % caption
1782             else:
1783                 params += ',caption={%s}' % caption
1784         if len(label) > 0:
1785             if len(params) == 0:
1786                 params = 'label={%s}' % label
1787             else:
1788                 params += ',label={%s}' % label
1789         if len(params) > 0:
1790             params = '[%s]' % params
1791             params = params.replace('\\', '\\backslash\n')
1792         if inline == 'true':
1793             document.body[i:(j+1)] = [r'\begin_inset ERT',
1794                                       'status %s' % status,
1795                                       r'\begin_layout %s' % document.default_layout,
1796                                       '',
1797                                       '',
1798                                       r'\backslash',
1799                                       'lstinline%s{%s}' % (params, inlinecode),
1800                                       r'\end_layout',
1801                                       '',
1802                                       r'\end_inset']
1803         else:
1804             document.body[i: j+1] =  [r'\begin_inset ERT',
1805                                       'status %s' % status,
1806                                       '',
1807                                       r'\begin_layout %s' % document.default_layout,
1808                                       '',
1809                                       '',
1810                                       r'\backslash',
1811                                       r'begin{lstlisting}%s' % params,
1812                                       r'\end_layout'
1813                                     ] + document.body[k : j - 1] + \
1814                                      ['',
1815                                       r'\begin_layout %s' % document.default_layout,
1816                                       '',
1817                                       r'\backslash',
1818                                       'end{lstlisting}',
1819                                       r'\end_layout',
1820                                       '',
1821                                       r'\end_inset']
1822
1823
1824 def revert_include_listings(document):
1825     r''' Revert lstinputlisting Include option , translate
1826 \begin_inset Include \lstinputlisting{file}[opt]
1827 preview false
1828
1829 \end_inset
1830
1831 TO
1832
1833 \begin_inset ERT
1834 status open
1835
1836 \begin_layout Standard
1837
1838
1839 \backslash
1840 lstinputlisting{file}[opt]
1841 \end_layout
1842
1843 \end_inset
1844     '''
1845
1846     i = 0
1847     while True:
1848         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1849         if i == -1:
1850             break
1851         else:
1852             if not '\\usepackage{listings}' in document.preamble:
1853                 document.preamble.append('\\usepackage{listings}')
1854         j = find_end_of_inset(document.body, i + 1)
1855         if j == -1:
1856             # this should not happen
1857             break
1858         # find command line lstinputlisting{file}[options]
1859         cmd, file, option = '', '', ''
1860         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1861             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1862         option = option.replace('\\', '\\backslash\n')
1863         document.body[i : j + 1] = [r'\begin_inset ERT',
1864                                     'status open',
1865                                     '',
1866                                     r'\begin_layout %s' % document.default_layout,
1867                                     '',
1868                                     '',
1869                                     r'\backslash',
1870                                     '%s%s{%s}' % (cmd, option, file),
1871                                     r'\end_layout',
1872                                     '',
1873                                     r'\end_inset']
1874
1875
1876 def revert_ext_font_sizes(document):
1877     if document.backend != "latex": return
1878     if not document.textclass.startswith("ext"): return
1879
1880     fontsize = get_value(document.header, '\\paperfontsize', 0)
1881     if fontsize not in ('10', '11', '12'): return
1882     fontsize += 'pt'
1883
1884     i = find_token(document.header, '\\paperfontsize', 0)
1885     document.header[i] = '\\paperfontsize default'
1886
1887     i = find_token(document.header, '\\options', 0)
1888     if i == -1:
1889         i = find_token(document.header, '\\textclass', 0) + 1
1890         document.header[i:i] = ['\\options %s' % fontsize]
1891     else:
1892         document.header[i] += ',%s' % fontsize
1893
1894
1895 def convert_ext_font_sizes(document):
1896     if document.backend != "latex": return
1897     if not document.textclass.startswith("ext"): return
1898
1899     fontsize = get_value(document.header, '\\paperfontsize', 0)
1900     if fontsize != 'default': return
1901
1902     i = find_token(document.header, '\\options', 0)
1903     if i == -1: return
1904
1905     options = get_value(document.header, '\\options', i)
1906
1907     fontsizes = '10pt', '11pt', '12pt'
1908     for fs in fontsizes:
1909         if options.find(fs) != -1:
1910             break
1911     else: # this else will only be attained if the for cycle had no match
1912         return
1913
1914     options = options.split(',')
1915     for j, opt in enumerate(options):
1916         if opt in fontsizes:
1917             fontsize = opt[:-2]
1918             del options[j]
1919             break
1920     else:
1921         return
1922
1923     k = find_token(document.header, '\\paperfontsize', 0)
1924     document.header[k] = '\\paperfontsize %s' % fontsize
1925
1926     if options:
1927         document.header[i] = '\\options %s' % ','.join(options)
1928     else:
1929         del document.header[i]
1930
1931
1932 def revert_separator_layout(document):
1933     r'''Revert --Separator-- to a lyx note
1934 From
1935
1936 \begin_layout --Separator--
1937 something
1938 \end_layout
1939
1940 to
1941
1942 \begin_layout Standard
1943 \begin_inset Note Note
1944 status open
1945
1946 \begin_layout Standard
1947 Separate Evironment
1948 \end_layout
1949
1950 \end_inset
1951 something
1952
1953 \end_layout
1954
1955     '''
1956
1957     i = 0
1958     while True:
1959         i = find_token(document.body, r'\begin_layout --Separator--', i)
1960         if i == -1:
1961             break
1962         j = find_end_of_layout(document.body, i + 1)
1963         if j == -1:
1964             # this should not happen
1965             break
1966         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1967                                     r'\begin_inset Note Note',
1968                                     'status open',
1969                                     '',
1970                                     r'\begin_layout %s' % document.default_layout,
1971                                     'Separate Environment',
1972                                     r'\end_layout',
1973                                     '',
1974                                     r'\end_inset'] + \
1975                                     document.body[ i + 1 : j] + \
1976                                     ['',
1977                                     r'\end_layout'
1978                                     ]
1979
1980
1981 def convert_arabic (document):
1982     if document.language == "arabic":
1983         document.language = "arabic_arabtex"
1984         i = find_token(document.header, "\\language", 0)
1985         if i != -1:
1986             document.header[i] = "\\language arabic_arabtex"
1987     i = 0
1988     while i < len(document.body):
1989         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1990         if (h != -1):
1991             # change the language name
1992             document.body[i] = '\lang arabic_arabtex'
1993         i = i + 1
1994
1995
1996 def revert_arabic (document):
1997     if document.language == "arabic_arabtex":
1998         document.language = "arabic"
1999         i = find_token(document.header, "\\language", 0)
2000         if i != -1:
2001             document.header[i] = "\\language arabic"
2002     i = 0
2003     while i < len(document.body):
2004         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2005         if (h != -1):
2006             # change the language name
2007             document.body[i] = '\lang arabic'
2008         i = i + 1
2009
2010
2011 ##
2012 # Conversion hub
2013 #
2014
2015 supported_versions = ["1.5.0","1.5"]
2016 convert = [[246, []],
2017            [247, [convert_font_settings]],
2018            [248, []],
2019            [249, [convert_utf8]],
2020            [250, []],
2021            [251, []],
2022            [252, [convert_commandparams, convert_bibitem]],
2023            [253, []],
2024            [254, [convert_esint]],
2025            [255, []],
2026            [256, []],
2027            [257, [convert_caption]],
2028            [258, [convert_lyxline]],
2029            [259, [convert_accent, normalize_font_whitespace_259]],
2030            [260, []],
2031            [261, [convert_changes]],
2032            [262, []],
2033            [263, [normalize_language_name]],
2034            [264, [convert_cv_textclass]],
2035            [265, [convert_tableborder]],
2036            [266, []],
2037            [267, []],
2038            [268, []],
2039            [269, []],
2040            [270, []],
2041            [271, [convert_ext_font_sizes]],
2042            [272, []],
2043            [273, []],
2044            [274, [normalize_font_whitespace_274]],
2045            [275, [convert_graphics_rotation]],
2046            [276, [convert_arabic]]
2047           ]
2048
2049 revert =  [
2050            [275, [revert_arabic]],
2051            [274, [revert_graphics_rotation]],
2052            [273, []],
2053            [272, [revert_separator_layout]],
2054            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2055            [270, [revert_ext_font_sizes]],
2056            [269, [revert_beamer_alert, revert_beamer_structure]],
2057            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2058            [267, [revert_CJK]],
2059            [266, [revert_utf8plain]],
2060            [265, [revert_armenian]],
2061            [264, [revert_tableborder]],
2062            [263, [revert_cv_textclass]],
2063            [262, [revert_language_name]],
2064            [261, [revert_ascii]],
2065            [260, []],
2066            [259, [revert_utf8x]],
2067            [258, []],
2068            [257, []],
2069            [256, [revert_caption]],
2070            [255, [revert_encodings]],
2071            [254, [revert_clearpage, revert_cleardoublepage]],
2072            [253, [revert_esint]],
2073            [252, [revert_nomenclature, revert_printnomenclature]],
2074            [251, [revert_commandparams]],
2075            [250, [revert_cs_label]],
2076            [249, []],
2077            [248, [revert_accent, revert_utf8, revert_unicode]],
2078            [247, [revert_booktabs]],
2079            [246, [revert_font_settings]],
2080            [245, [revert_framed]]]
2081
2082
2083 if __name__ == "__main__":
2084     pass