lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if not forward: # no need to read file unless we are reverting
 257         spec_chars = read_unicodesymbols()
 258
 259     if document.inputencoding == "auto" or document.inputencoding == "default":
 260         i = 0
 261         while i < len(document.body):
 262             result = lang_re.match(document.body[i])
 263             if result:
 264                 language = result.group(1)
 265                 if language == "default":
 266                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 267                     encoding_stack[-1] = document.encoding
 268                 else:
 269                     from lyx2lyx_lang import lang
 270                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 271                     encoding_stack[-1] = lang[language][3]
 272             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 273                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 274                 if len(insets) > 0 and insets[-1] in inset_types:
 275                     from lyx2lyx_lang import lang
 276                     encoding_stack.append(lang[document.language][3])
 277                 else:
 278                     encoding_stack.append(encoding_stack[-1])
 279             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 280                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 281                 if len(encoding_stack) == 1:
 282                     # Don't remove the document encoding from the stack
 283                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 284                 else:
 285                     del encoding_stack[-1]
 286             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 287                 inset_result = inset_re.match(document.body[i])
 288                 if inset_result:
 289                     insets.append(inset_result.group(1))
 290                 else:
 291                     insets.append("")
 292             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 293                 del insets[-1]
 294             if encoding_stack[-1] != document.encoding:
 295                 if forward:
 296                     # This line has been incorrectly interpreted as if it was
 297                     # encoded in 'encoding'.
 298                     # Convert back to the 8bit string that was in the file.
 299                     orig = document.body[i].encode(document.encoding)
 300                     # Convert the 8bit string that was in the file to unicode
 301                     # with the correct encoding.
 302                     document.body[i] = orig.decode(encoding_stack[-1])
 303                 else:
 304                     try:
 305                         # Convert unicode to the 8bit string that will be written
 306                         # to the file with the correct encoding.
 307                         orig = document.body[i].encode(encoding_stack[-1])
 308                         # Convert the 8bit string that will be written to the
 309                         # file to fake unicode with the encoding that will later
 310                         # be used when writing to the file.
 311                         document.body[i] = orig.decode(document.encoding)
 312                     except:
 313                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 314                         document.body[i:i+1] = mod_line.split('\n')
 315                         i += len(mod_line.split('\n')) - 1
 316             i += 1
 317
 318
 319 def convert_utf8(document):
 320     " Set document encoding to UTF-8. "
 321     convert_multiencoding(document, True)
 322     document.encoding = "utf8"
 323
 324
 325 def revert_utf8(document):
 326     " Set document encoding to the value corresponding to inputencoding. "
 327     i = find_token(document.header, "\\inputencoding", 0)
 328     if i == -1:
 329         document.header.append("\\inputencoding auto")
 330     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 331         document.header[i] = "\\inputencoding auto"
 332     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 333     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 334     convert_multiencoding(document, False)
 335
 336
 337 def read_unicodesymbols():
 338     " Read the unicodesymbols list of unicode characters and corresponding commands."
 339     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 340     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 341     spec_chars = {}
 342     for line in fp.readlines():
 343         if line[0] != '#':
 344             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 345             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 346             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 347             try:
 348                 # flag1 and flag2 are preamble and other flags
 349                 [ucs4,command,flag1,flag2] =line.split(None,3)
 350                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 351             except:
 352                 pass
 353     fp.close()
 354     return spec_chars
 355
 356
 357 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 358     # Define strings to start and end ERT and math insets
 359     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 360     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 361     math_intro='\n\\begin_inset Formula $'
 362     math_outro='$\n\\end_inset'
 363
 364     mod_line = u''
 365     if i and document.body[i - 1][:1] != '\\':
 366         last_char = document.body[i - 1][-1:]
 367     else:
 368         last_char = ''
 369
 370     line = document.body[i]
 371     for character in line:
 372         try:
 373             # Try to write the character
 374             dummy = character.encode(document.encoding)
 375             mod_line += character
 376             last_char = character
 377         except:
 378             # Try to replace with ERT/math inset
 379             if spec_chars.has_key(character):
 380                 command = spec_chars[character][0] # the command to replace unicode
 381                 flag1 = spec_chars[character][1]
 382                 flag2 = spec_chars[character][2]
 383                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 384                     # We have a character that should be combined with the previous
 385                     command += '{' + last_char + '}'
 386                     # Remove the last character. Ignore if it is whitespace
 387                     if len(last_char.rstrip()):
 388                         # last_char was found and is not whitespace
 389                         if mod_line:
 390                             mod_line = mod_line[:-1]
 391                         else: # last_char belongs to the last line
 392                             document.body[i-1] = document.body[i-1][:-1]
 393                     else:
 394                         # The last character was replaced by a command. For now it is
 395                         # ignored. This could be handled better.
 396                         pass
 397                 if command[0:2] == '\\\\':
 398                     if command[2:12]=='ensuremath':
 399                         if insets and insets[-1] == "ERT":
 400                             # math in ERT
 401                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 402                             command = command.replace('}', '$\n')
 403                         elif not insets or insets[-1] != "Formula":
 404                             # add a math inset with the replacement character
 405                             command = command.replace('\\\\ensuremath{\\', math_intro)
 406                             command = command.replace('}', math_outro)
 407                         else:
 408                             # we are already in a math inset
 409                             command = command.replace('\\\\ensuremath{\\', '')
 410                             command = command.replace('}', '')
 411                     else:
 412                         if insets and insets[-1] == "Formula":
 413                             # avoid putting an ERT in a math; instead put command as text
 414                             command = command.replace('\\\\', '\mathrm{')
 415                             command = command + '}'
 416                         elif not insets or insets[-1] != "ERT":
 417                             # add an ERT inset with the replacement character
 418                             command = command.replace('\\\\', ert_intro)
 419                             command = command + ert_outro
 420                         else:
 421                             command = command.replace('\\\\', '\n\\backslash\n')
 422                     last_char = '' # indicate that the character should not be removed
 423                 mod_line += command
 424             else:
 425                 # Replace with replacement string
 426                 mod_line += replacement_character
 427     return mod_line
 428
 429
 430 def revert_unicode(document):
 431     '''Transform unicode characters that can not be written using the
 432 document encoding to commands according to the unicodesymbols
 433 file. Characters that can not be replaced by commands are replaced by
 434 an replacement string.  Flags other than 'combined' are currently not
 435 implemented.'''
 436     spec_chars = read_unicodesymbols()
 437     insets = [] # list of active insets
 438
 439     # Go through the document to capture all combining characters
 440     i = 0
 441     while i < len(document.body):
 442         line = document.body[i]
 443         # Check for insets
 444         if line.find('\\begin_inset') > -1:
 445             insets.append(line[13:].split()[0])
 446         if line.find('\\end_inset') > -1:
 447             del insets[-1]
 448
 449         # Try to write the line
 450         try:
 451             # If all goes well the line is written here
 452             dummy = line.encode(document.encoding)
 453             i += 1
 454         except:
 455             # Error, some character(s) in the line need to be replaced
 456             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 457             document.body[i:i+1] = mod_line.split('\n')
 458             i += len(mod_line.split('\n'))
 459
 460
 461 def revert_cs_label(document):
 462     " Remove status flag of charstyle label. "
 463     i = 0
 464     while 1:
 465         i = find_token(document.body, "\\begin_inset CharStyle", i)
 466         if i == -1:
 467             return
 468         # Seach for a line starting 'show_label'
 469         # If it is not there, break with a warning message
 470         i = i + 1
 471         while 1:
 472             if (document.body[i][:10] == "show_label"):
 473                 del document.body[i]
 474                 break
 475             elif (document.body[i][:13] == "\\begin_layout"):
 476                 document.warning("Malformed LyX document: Missing 'show_label'.")
 477                 break
 478             i = i + 1
 479
 480         i = i + 1
 481
 482
 483 def convert_bibitem(document):
 484     """ Convert
 485 \bibitem [option]{argument}
 486
 487 to
 488
 489 \begin_inset LatexCommand bibitem
 490 label "option"
 491 key "argument"
 492
 493 \end_inset
 494
 495 This must be called after convert_commandparams.
 496 """
 497     i = 0
 498     while 1:
 499         i = find_token(document.body, "\\bibitem", i)
 500         if i == -1:
 501             break
 502         j = document.body[i].find('[') + 1
 503         k = document.body[i].rfind(']')
 504         if j == 0: # No optional argument found
 505             option = None
 506         else:
 507             option = document.body[i][j:k]
 508         j = document.body[i].rfind('{') + 1
 509         k = document.body[i].rfind('}')
 510         argument = document.body[i][j:k]
 511         lines = ['\\begin_inset LatexCommand bibitem']
 512         if option != None:
 513             lines.append('label "%s"' % option.replace('"', '\\"'))
 514         lines.append('key "%s"' % argument.replace('"', '\\"'))
 515         lines.append('')
 516         lines.append('\\end_inset')
 517         document.body[i:i+1] = lines
 518         i = i + 1
 519
 520
 521 commandparams_info = {
 522     # command : [option1, option2, argument]
 523     "bibitem" : ["label", "", "key"],
 524     "bibtex" : ["options", "btprint", "bibfiles"],
 525     "cite"        : ["after", "before", "key"],
 526     "citet"       : ["after", "before", "key"],
 527     "citep"       : ["after", "before", "key"],
 528     "citealt"     : ["after", "before", "key"],
 529     "citealp"     : ["after", "before", "key"],
 530     "citeauthor"  : ["after", "before", "key"],
 531     "citeyear"    : ["after", "before", "key"],
 532     "citeyearpar" : ["after", "before", "key"],
 533     "citet*"      : ["after", "before", "key"],
 534     "citep*"      : ["after", "before", "key"],
 535     "citealt*"    : ["after", "before", "key"],
 536     "citealp*"    : ["after", "before", "key"],
 537     "citeauthor*" : ["after", "before", "key"],
 538     "Citet"       : ["after", "before", "key"],
 539     "Citep"       : ["after", "before", "key"],
 540     "Citealt"     : ["after", "before", "key"],
 541     "Citealp"     : ["after", "before", "key"],
 542     "Citeauthor"  : ["after", "before", "key"],
 543     "Citet*"      : ["after", "before", "key"],
 544     "Citep*"      : ["after", "before", "key"],
 545     "Citealt*"    : ["after", "before", "key"],
 546     "Citealp*"    : ["after", "before", "key"],
 547     "Citeauthor*" : ["after", "before", "key"],
 548     "citefield"   : ["after", "before", "key"],
 549     "citetitle"   : ["after", "before", "key"],
 550     "cite*"       : ["after", "before", "key"],
 551     "hfill" : ["", "", ""],
 552     "index"      : ["", "", "name"],
 553     "printindex" : ["", "", "name"],
 554     "label" : ["", "", "name"],
 555     "eqref"     : ["name", "", "reference"],
 556     "pageref"   : ["name", "", "reference"],
 557     "prettyref" : ["name", "", "reference"],
 558     "ref"       : ["name", "", "reference"],
 559     "vpageref"  : ["name", "", "reference"],
 560     "vref"      : ["name", "", "reference"],
 561     "tableofcontents" : ["", "", "type"],
 562     "htmlurl" : ["name", "", "target"],
 563     "url"     : ["name", "", "target"]}
 564
 565
 566 def convert_commandparams(document):
 567     """ Convert
 568
 569  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 570  \end_inset
 571
 572  to
 573
 574  \begin_inset LatexCommand cmdname
 575  name1 "opt1"
 576  name2 "opt2"
 577  name3 "arg"
 578  \end_inset
 579
 580  name1, name2 and name3 can be different for each command.
 581 """
 582     # \begin_inset LatexCommand bibitem was not the official version (see
 583     # convert_bibitem()), but could be read in, so we convert it here, too.
 584
 585     i = 0
 586     while 1:
 587         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 588         if i == -1:
 589             break
 590         command = document.body[i][26:].strip()
 591         if command == "":
 592             document.warning("Malformed LyX document: Missing LatexCommand name.")
 593             i = i + 1
 594             continue
 595
 596         j = find_token(document.body, "\\end_inset", i + 1)
 597         if j == -1:
 598             document.warning("Malformed document")
 599         else:
 600             command += "".join(document.body[i+1:j])
 601             document.body[i+1:j] = []
 602
 603         # The following parser is taken from the original InsetCommandParams::scanCommand
 604         name = ""
 605         option1 = ""
 606         option2 = ""
 607         argument = ""
 608         state = "WS"
 609         # Used to handle things like \command[foo[bar]]{foo{bar}}
 610         nestdepth = 0
 611         b = 0
 612         for c in command:
 613             if ((state == "CMDNAME" and c == ' ') or
 614                 (state == "CMDNAME" and c == '[') or
 615                 (state == "CMDNAME" and c == '{')):
 616                 state = "WS"
 617             if ((state == "OPTION" and c == ']') or
 618                 (state == "SECOPTION" and c == ']') or
 619                 (state == "CONTENT" and c == '}')):
 620                 if nestdepth == 0:
 621                     state = "WS"
 622                 else:
 623                     nestdepth = nestdepth - 1
 624             if ((state == "OPTION" and c == '[') or
 625                 (state == "SECOPTION" and c == '[') or
 626                 (state == "CONTENT" and c == '{')):
 627                 nestdepth = nestdepth + 1
 628             if state == "CMDNAME":
 629                     name += c
 630             elif state == "OPTION":
 631                     option1 += c
 632             elif state == "SECOPTION":
 633                     option2 += c
 634             elif state == "CONTENT":
 635                     argument += c
 636             elif state == "WS":
 637                 if c == '\\':
 638                     state = "CMDNAME"
 639                 elif c == '[' and b != ']':
 640                     state = "OPTION"
 641                     nestdepth = 0 # Just to be sure
 642                 elif c == '[' and b == ']':
 643                     state = "SECOPTION"
 644                     nestdepth = 0 # Just to be sure
 645                 elif c == '{':
 646                     state = "CONTENT"
 647                     nestdepth = 0 # Just to be sure
 648             b = c
 649
 650         # Now we have parsed the command, output the parameters
 651         lines = ["\\begin_inset LatexCommand %s" % name]
 652         if option1 != "":
 653             if commandparams_info[name][0] == "":
 654                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 655             else:
 656                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 657         if option2 != "":
 658             if commandparams_info[name][1] == "":
 659                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 660             else:
 661                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 662         if argument != "":
 663             if commandparams_info[name][2] == "":
 664                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 665             else:
 666                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 667         document.body[i:i+1] = lines
 668         i = i + 1
 669
 670
 671 def revert_commandparams(document):
 672     regex = re.compile(r'(\S+)\s+(.+)')
 673     i = 0
 674     while 1:
 675         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 676         if i == -1:
 677             break
 678         name = document.body[i].split()[2]
 679         j = find_end_of_inset(document.body, i + 1)
 680         preview_line = ""
 681         option1 = ""
 682         option2 = ""
 683         argument = ""
 684         for k in range(i + 1, j):
 685             match = re.match(regex, document.body[k])
 686             if match:
 687                 pname = match.group(1)
 688                 pvalue = match.group(2)
 689                 if pname == "preview":
 690                     preview_line = document.body[k]
 691                 elif (commandparams_info[name][0] != "" and
 692                       pname == commandparams_info[name][0]):
 693                     option1 = pvalue.strip('"').replace('\\"', '"')
 694                 elif (commandparams_info[name][1] != "" and
 695                       pname == commandparams_info[name][1]):
 696                     option2 = pvalue.strip('"').replace('\\"', '"')
 697                 elif (commandparams_info[name][2] != "" and
 698                       pname == commandparams_info[name][2]):
 699                     argument = pvalue.strip('"').replace('\\"', '"')
 700             elif document.body[k].strip() != "":
 701                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 702         if name == "bibitem":
 703             if option1 == "":
 704                 lines = ["\\bibitem {%s}" % argument]
 705             else:
 706                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 707         else:
 708             if option1 == "":
 709                 if option2 == "":
 710                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 711                 else:
 712                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 713             else:
 714                 if option2 == "":
 715                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 716                 else:
 717                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 718         if name != "bibitem":
 719             if preview_line != "":
 720                 lines.append(preview_line)
 721             lines.append('')
 722             lines.append('\\end_inset')
 723         document.body[i:j+1] = lines
 724         i = j + 1
 725
 726
 727 def revert_nomenclature(document):
 728     " Convert nomenclature entry to ERT. "
 729     regex = re.compile(r'(\S+)\s+(.+)')
 730     i = 0
 731     use_nomencl = 0
 732     while 1:
 733         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 734         if i == -1:
 735             break
 736         use_nomencl = 1
 737         j = find_end_of_inset(document.body, i + 1)
 738         preview_line = ""
 739         symbol = ""
 740         description = ""
 741         prefix = ""
 742         for k in range(i + 1, j):
 743             match = re.match(regex, document.body[k])
 744             if match:
 745                 name = match.group(1)
 746                 value = match.group(2)
 747                 if name == "preview":
 748                     preview_line = document.body[k]
 749                 elif name == "symbol":
 750                     symbol = value.strip('"').replace('\\"', '"')
 751                 elif name == "description":
 752                     description = value.strip('"').replace('\\"', '"')
 753                 elif name == "prefix":
 754                     prefix = value.strip('"').replace('\\"', '"')
 755             elif document.body[k].strip() != "":
 756                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 757         if prefix == "":
 758             command = 'nomenclature{%s}{%s}' % (symbol, description)
 759         else:
 760             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 761         document.body[i:j+1] = ['\\begin_inset ERT',
 762                                 'status collapsed',
 763                                 '',
 764                                 '\\begin_layout %s' % document.default_layout,
 765                                 '',
 766                                 '',
 767                                 '\\backslash',
 768                                 command,
 769                                 '\\end_layout',
 770                                 '',
 771                                 '\\end_inset']
 772         i = i + 11
 773     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 774         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 775         document.preamble.append('\\makenomenclature')
 776
 777
 778 def revert_printnomenclature(document):
 779     " Convert printnomenclature to ERT. "
 780     regex = re.compile(r'(\S+)\s+(.+)')
 781     i = 0
 782     use_nomencl = 0
 783     while 1:
 784         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 785         if i == -1:
 786             break
 787         use_nomencl = 1
 788         j = find_end_of_inset(document.body, i + 1)
 789         preview_line = ""
 790         labelwidth = ""
 791         for k in range(i + 1, j):
 792             match = re.match(regex, document.body[k])
 793             if match:
 794                 name = match.group(1)
 795                 value = match.group(2)
 796                 if name == "preview":
 797                     preview_line = document.body[k]
 798                 elif name == "labelwidth":
 799                     labelwidth = value.strip('"').replace('\\"', '"')
 800             elif document.body[k].strip() != "":
 801                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 802         if labelwidth == "":
 803             command = 'nomenclature{}'
 804         else:
 805             command = 'nomenclature[%s]' % labelwidth
 806         document.body[i:j+1] = ['\\begin_inset ERT',
 807                                 'status collapsed',
 808                                 '',
 809                                 '\\begin_layout %s' % document.default_layout,
 810                                 '',
 811                                 '',
 812                                 '\\backslash',
 813                                 command,
 814                                 '\\end_layout',
 815                                 '',
 816                                 '\\end_inset']
 817         i = i + 11
 818     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 819         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 820         document.preamble.append('\\makenomenclature')
 821
 822
 823 def convert_esint(document):
 824     " Add \\use_esint setting to header. "
 825     i = find_token(document.header, "\\cite_engine", 0)
 826     if i == -1:
 827         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 828         return
 829     # 0 is off, 1 is auto, 2 is on.
 830     document.header.insert(i, '\\use_esint 0')
 831
 832
 833 def revert_esint(document):
 834     " Remove \\use_esint setting from header. "
 835     i = find_token(document.header, "\\use_esint", 0)
 836     if i == -1:
 837         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 838         return
 839     use_esint = document.header[i].split()[1]
 840     del document.header[i]
 841     # 0 is off, 1 is auto, 2 is on.
 842     if (use_esint == 2):
 843         document.preamble.append('\\usepackage{esint}')
 844
 845
 846 def revert_clearpage(document):
 847     " clearpage -> ERT "
 848     i = 0
 849     while 1:
 850         i = find_token(document.body, "\\clearpage", i)
 851         if i == -1:
 852             break
 853         document.body[i:i+1] =  ['\\begin_inset ERT',
 854                                 'status collapsed',
 855                                 '',
 856                                 '\\begin_layout %s' % document.default_layout,
 857                                 '',
 858                                 '',
 859                                 '\\backslash',
 860                                 'clearpage',
 861                                 '\\end_layout',
 862                                 '',
 863                                 '\\end_inset']
 864     i = i + 1
 865
 866
 867 def revert_cleardoublepage(document):
 868     " cleardoublepage -> ERT "
 869     i = 0
 870     while 1:
 871         i = find_token(document.body, "\\cleardoublepage", i)
 872         if i == -1:
 873             break
 874         document.body[i:i+1] =  ['\\begin_inset ERT',
 875                                 'status collapsed',
 876                                 '',
 877                                 '\\begin_layout %s' % document.default_layout,
 878                                 '',
 879                                 '',
 880                                 '\\backslash',
 881                                 'cleardoublepage',
 882                                 '\\end_layout',
 883                                 '',
 884                                 '\\end_inset']
 885     i = i + 1
 886
 887
 888 def convert_lyxline(document):
 889     " remove fontsize commands for \lyxline "
 890     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 891     # to change the line thickness. The new definiton does this so that imported
 892     # \lyxlines would have a different line thickness. The eventual fontsize command
 893     # before \lyxline is therefore removed to get the same output.
 894     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 895                  "large", "Large", "LARGE", "huge", "Huge"]
 896     for n in range(0, len(fontsizes)):
 897         i = 0
 898         k = 0
 899         while i < len(document.body):
 900             i = find_token(document.body, "\\size " + fontsizes[n], i)
 901             k = find_token(document.body, "\\lyxline", i)
 902             # the corresponding fontsize command is always 2 lines before the \lyxline
 903             if (i != -1 and k == i+2):
 904                 document.body[i:i+1] = []
 905             else:
 906                 break
 907         i = i + 1
 908
 909
 910 def revert_encodings(document):
 911     " Set new encodings to auto. "
 912     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 913                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 914                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 915     i = find_token(document.header, "\\inputencoding", 0)
 916     if i == -1:
 917         document.header.append("\\inputencoding auto")
 918     else:
 919         inputenc = get_value(document.header, "\\inputencoding", i)
 920         if inputenc in encodings:
 921             document.header[i] = "\\inputencoding auto"
 922     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 923
 924
 925 def convert_caption(document):
 926     " Convert caption layouts to caption insets. "
 927     i = 0
 928     while 1:
 929         i = find_token(document.body, "\\begin_layout Caption", i)
 930         if i == -1:
 931             return
 932         j = find_end_of_layout(document.body, i)
 933         if j == -1:
 934             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 935             return
 936
 937         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 938         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 939                             "\\begin_inset Caption", "",
 940                             "\\begin_layout %s" % document.default_layout]
 941         i = i + 1
 942
 943
 944 def revert_caption(document):
 945     " Convert caption insets to caption layouts. "
 946     " This assumes that the text class has a caption style. "
 947     i = 0
 948     while 1:
 949         i = find_token(document.body, "\\begin_inset Caption", i)
 950         if i == -1:
 951             return
 952
 953         # We either need to delete the previous \begin_layout line, or we
 954         # need to end the previous layout if this inset is not in the first
 955         # position of the paragraph.
 956         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 957         if layout_before == -1:
 958             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 959             return
 960         layout_line = document.body[layout_before]
 961         del_layout_before = True
 962         l = layout_before + 1
 963         while l < i:
 964             if document.body[l] != "":
 965                 del_layout_before = False
 966                 break
 967             l = l + 1
 968         if del_layout_before:
 969             del document.body[layout_before:i]
 970             i = layout_before
 971         else:
 972             document.body[i:i] = ["\\end_layout", ""]
 973             i = i + 2
 974
 975         # Find start of layout in the inset and end of inset
 976         j = find_token(document.body, "\\begin_layout", i)
 977         if j == -1:
 978             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 979             return
 980         k = find_end_of_inset(document.body, i)
 981         if k == -1:
 982             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 983             return
 984
 985         # We either need to delete the following \end_layout line, or we need
 986         # to restart the old layout if this inset is not at the paragraph end.
 987         layout_after = find_token(document.body, "\\end_layout", k)
 988         if layout_after == -1:
 989             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 990             return
 991         del_layout_after = True
 992         l = k + 1
 993         while l < layout_after:
 994             if document.body[l] != "":
 995                 del_layout_after = False
 996                 break
 997             l = l + 1
 998         if del_layout_after:
 999             del document.body[k+1:layout_after+1]
1000         else:
1001             document.body[k+1:k+1] = [layout_line, ""]
1002
1003         # delete \begin_layout and \end_inset and replace \begin_inset with
1004         # "\begin_layout Caption". This works because we can only have one
1005         # paragraph in the caption inset: The old \end_layout will be recycled.
1006         del document.body[k]
1007         if document.body[k] == "":
1008             del document.body[k]
1009         del document.body[j]
1010         if document.body[j] == "":
1011             del document.body[j]
1012         document.body[i] = "\\begin_layout Caption"
1013         if document.body[i+1] == "":
1014             del document.body[i+1]
1015         i = i + 1
1016
1017
1018 # Accents of InsetLaTeXAccent
1019 accent_map = {
1020     "`" : u'\u0300', # grave
1021     "'" : u'\u0301', # acute
1022     "^" : u'\u0302', # circumflex
1023     "~" : u'\u0303', # tilde
1024     "=" : u'\u0304', # macron
1025     "u" : u'\u0306', # breve
1026     "." : u'\u0307', # dot above
1027     "\"": u'\u0308', # diaeresis
1028     "r" : u'\u030a', # ring above
1029     "H" : u'\u030b', # double acute
1030     "v" : u'\u030c', # caron
1031     "b" : u'\u0320', # minus sign below
1032     "d" : u'\u0323', # dot below
1033     "c" : u'\u0327', # cedilla
1034     "k" : u'\u0328', # ogonek
1035     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1036                      # only one is given as argument, so we don't need to
1037                      # treat it differently.
1038 }
1039
1040
1041 # special accents of InsetLaTeXAccent without argument
1042 special_accent_map = {
1043     'i' : u'\u0131', # dotless i
1044     'j' : u'\u0237', # dotless j
1045     'l' : u'\u0142', # l with stroke
1046     'L' : u'\u0141'  # L with stroke
1047 }
1048
1049
1050 # special accent arguments of InsetLaTeXAccent
1051 accented_map = {
1052     '\\i' : u'\u0131', # dotless i
1053     '\\j' : u'\u0237'  # dotless j
1054 }
1055
1056
1057 def _convert_accent(accent, accented_char):
1058     type = accent
1059     char = accented_char
1060     if char == '':
1061         if type in special_accent_map:
1062             return special_accent_map[type]
1063         # a missing char is treated as space by LyX
1064         char = ' '
1065     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1066         # Special caron, only used with t, d, l and L.
1067         # It is not in the map because we convert it to the same unicode
1068         # character as the normal caron: \q{} is only defined if babel with
1069         # the czech or slovak language is used, and the normal caron
1070         # produces the correct output if the T1 font encoding is used.
1071         # For the same reason we never convert to \q{} in the other direction.
1072         type = 'v'
1073     elif char in accented_map:
1074         char = accented_map[char]
1075     elif (len(char) > 1):
1076         # We can only convert accents on a single char
1077         return ''
1078     a = accent_map.get(type)
1079     if a:
1080         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1081     return ''
1082
1083
1084 def convert_ertbackslash(body, i, ert, default_layout):
1085     r""" -------------------------------------------------------------------------------------------
1086     Convert backslashes and '\n' into valid ERT code, append the converted
1087     text to body[i] and return the (maybe incremented) line index i"""
1088
1089     for c in ert:
1090         if c == '\\':
1091             body[i] = body[i] + '\\backslash '
1092             i = i + 1
1093             body.insert(i, '')
1094         elif c == '\n':
1095             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1096             i = i + 4
1097         else:
1098             body[i] = body[i] + c
1099     return i
1100
1101
1102 def convert_accent(document):
1103     # The following forms are supported by LyX:
1104     # '\i \"{a}' (standard form, as written by LyX)
1105     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1106     # '\i \"{ }' (also accepted if the accented char is a space)
1107     # '\i \" a'  (also accepted)
1108     # '\i \"'    (also accepted)
1109     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1110     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1111     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1112     i = 0
1113     while 1:
1114         i = find_re(document.body, re_wholeinset, i)
1115         if i == -1:
1116             return
1117         match = re_wholeinset.match(document.body[i])
1118         prefix = match.group(1)
1119         contents = match.group(3).strip()
1120         match = re_contents.match(contents)
1121         if match:
1122             # Strip first char (always \)
1123             accent = match.group(1)[1:]
1124             accented_contents = match.group(2).strip()
1125             match = re_accentedcontents.match(accented_contents)
1126             accented_char = match.group(1)
1127             converted = _convert_accent(accent, accented_char)
1128             if converted == '':
1129                 # Normalize contents
1130                 contents = '%s{%s}' % (accent, accented_char),
1131             else:
1132                 document.body[i] = '%s%s' % (prefix, converted)
1133                 i += 1
1134                 continue
1135         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1136         document.body[i] = prefix
1137         document.body[i+1:i+1] = ['\\begin_inset ERT',
1138                                   'status collapsed',
1139                                   '',
1140                                   '\\begin_layout %s' % document.default_layout,
1141                                   '',
1142                                   '',
1143                                   '']
1144         i = convert_ertbackslash(document.body, i + 7,
1145                                  '\\%s' % contents,
1146                                  document.default_layout)
1147         document.body[i+1:i+1] = ['\\end_layout',
1148                                   '',
1149                                   '\\end_inset']
1150         i += 3
1151
1152
1153 def revert_accent(document):
1154     inverse_accent_map = {}
1155     for k in accent_map:
1156         inverse_accent_map[accent_map[k]] = k
1157     inverse_special_accent_map = {}
1158     for k in special_accent_map:
1159         inverse_special_accent_map[special_accent_map[k]] = k
1160     inverse_accented_map = {}
1161     for k in accented_map:
1162         inverse_accented_map[accented_map[k]] = k
1163
1164     # Since LyX may insert a line break within a word we must combine all
1165     # words before unicode normalization.
1166     # We do this only if the next line starts with an accent, otherwise we
1167     # would create things like '\begin_inset ERTstatus'.
1168     for i in range(len(document.body) - 1):
1169         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1170             continue
1171         if (document.body[i+1][0] in inverse_accent_map and document.body[i][:1] != '\\'):
1172             # the last character of this line and the first of the next line
1173             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1174             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1175                 document.body[i] += document.body[i+1][0]
1176                 document.body[i+1] = document.body[i+1][1:]
1177
1178     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1179     # This is needed to catch all accented characters.
1180     for i in range(len(document.body)):
1181         # Unfortunately we have a mixture of unicode strings and plain strings,
1182         # because we never use u'xxx' for string literals, but 'xxx'.
1183         # Therefore we may have to try two times to normalize the data.
1184         try:
1185             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1186         except TypeError:
1187             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1188
1189     # Replace accented characters with InsetLaTeXAccent
1190     # Do not convert characters that can be represented in the chosen
1191     # encoding.
1192     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1193     lang_re = re.compile(r"^\\lang\s(\S+)")
1194
1195     i = 0
1196     while i < len(document.body):
1197         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1198             # Track the encoding of the current line
1199             result = lang_re.match(document.body[i])
1200             if result:
1201                 language = result.group(1)
1202                 if language == "default":
1203                     encoding_stack[-1] = document.encoding
1204                 else:
1205                     from lyx2lyx_lang import lang
1206                     encoding_stack[-1] = lang[language][3]
1207                 continue
1208             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1209                 encoding_stack.append(encoding_stack[-1])
1210                 continue
1211             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1212                 del encoding_stack[-1]
1213                 continue
1214
1215         for j in range(len(document.body[i])):
1216             # dotless i and dotless j are both in special_accent_map and can
1217             # occur as an accented character, so we need to test that the
1218             # following character is no accent
1219             if (document.body[i][j] in inverse_special_accent_map and
1220                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1221                 accent = document.body[i][j]
1222                 try:
1223                     dummy = accent.encode(encoding_stack[-1])
1224                 except UnicodeEncodeError:
1225                     # Insert the rest of the line as new line
1226                     if j < len(document.body[i]) - 1:
1227                         document.body.insert(i+1, document.body[i][j+1:])
1228                     # Delete the accented character
1229                     if j > 0:
1230                         document.body[i] = document.body[i][:j-1]
1231                     else:
1232                         document.body[i] = u''
1233                     # Finally add the InsetLaTeXAccent
1234                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1235                     break
1236             elif j > 0 and document.body[i][j] in inverse_accent_map:
1237                 accented_char = document.body[i][j-1]
1238                 if accented_char == ' ':
1239                     # Conform to LyX output
1240                     accented_char = ''
1241                 elif accented_char in inverse_accented_map:
1242                     accented_char = inverse_accented_map[accented_char]
1243                 accent = document.body[i][j]
1244                 try:
1245                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1246                 except UnicodeEncodeError:
1247                     # Insert the rest of the line as new line
1248                     if j < len(document.body[i]) - 1:
1249                         document.body.insert(i+1, document.body[i][j+1:])
1250                     # Delete the accented characters
1251                     if j > 1:
1252                         document.body[i] = document.body[i][:j-1]
1253                     else:
1254                         document.body[i] = u''
1255                     # Finally add the InsetLaTeXAccent
1256                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1257                     break
1258         i = i + 1
1259
1260     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1261     for i in range(len(document.body)):
1262         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1263
1264
1265 def normalize_font_whitespace_259(document):
1266     """ Before format 259 the font changes were ignored if a
1267     whitespace was the first or last character in the sequence, this function
1268     transfers the whitespace outside."""
1269
1270     char_properties = {"\\series": "default",
1271                        "\\emph": "default",
1272                        "\\color": "none",
1273                        "\\shape": "default",
1274                        "\\bar": "default",
1275                        "\\family": "default"}
1276     return normalize_font_whitespace(document, char_properties)
1277
1278 def normalize_font_whitespace_274(document):
1279     """ Before format 259 (sic) the font changes were ignored if a
1280     whitespace was the first or last character in the sequence. This was
1281     corrected for most font properties in format 259, but the language
1282     was forgotten then. This function applies the same conversion done
1283     there (namely, transfers the whitespace outside) for font language
1284     changes, as well."""
1285
1286     char_properties = {"\\lang": "default"}
1287     return normalize_font_whitespace(document, char_properties)
1288
1289 def get_paragraph_language(document, i):
1290     """ Return the language of the paragraph in which line i of the document
1291     body is. If the first thing in the paragraph is a \\lang command, that
1292     is the paragraph's langauge; otherwise, the paragraph's language is the
1293     document's language."""
1294
1295     lines = document.body
1296
1297     first_nonempty_line = \
1298         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1299
1300     words = lines[first_nonempty_line].split()
1301
1302     if len(words) > 1 and words[0] == "\\lang":
1303         return words[1]
1304     else:
1305         return document.language
1306
1307 def normalize_font_whitespace(document, char_properties):
1308     """ Before format 259 the font changes were ignored if a
1309     whitespace was the first or last character in the sequence, this function
1310     transfers the whitespace outside. Only a change in one of the properties
1311     in the provided     char_properties is handled by this function."""
1312
1313     if document.backend != "latex":
1314         return
1315
1316     lines = document.body
1317
1318     changes = {}
1319
1320     i = 0
1321     while i < len(lines):
1322         words = lines[i].split()
1323
1324         if len(words) > 0 and words[0] == "\\begin_layout":
1325             # a new paragraph resets all font changes
1326             changes.clear()
1327             # also reset the default language to be the paragraph's language
1328             if "\\lang" in char_properties.keys():
1329                 char_properties["\\lang"] = \
1330                     get_paragraph_language(document, i + 1)
1331
1332         elif len(words) > 1 and words[0] in char_properties.keys():
1333             # we have a font change
1334             if char_properties[words[0]] == words[1]:
1335                 # property gets reset
1336                 if words[0] in changes.keys():
1337                     del changes[words[0]]
1338                 defaultproperty = True
1339             else:
1340                 # property gets set
1341                 changes[words[0]] = words[1]
1342                 defaultproperty = False
1343
1344             # We need to explicitly reset all changed properties if we find
1345             # a space below, because LyX 1.4 would output the space after
1346             # closing the previous change and before starting the new one,
1347             # and closing a font change means to close all properties, not
1348             # just the changed one.
1349
1350             if lines[i-1] and lines[i-1][-1] == " ":
1351                 lines[i-1] = lines[i-1][:-1]
1352                 # a space before the font change
1353                 added_lines = [" "]
1354                 for k in changes.keys():
1355                     # exclude property k because that is already in lines[i]
1356                     if k != words[0]:
1357                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1358                 for k in changes.keys():
1359                     # exclude property k because that must be added below anyway
1360                     if k != words[0]:
1361                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1362                 if defaultproperty:
1363                     # Property is reset in lines[i], so add the new stuff afterwards
1364                     lines[i+1:i+1] = added_lines
1365                 else:
1366                     # Reset property for the space
1367                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1368                     lines[i:i] = added_lines
1369                 i = i + len(added_lines)
1370
1371             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1372                 # a space after the font change
1373                 if (lines[i+1] == " " and lines[i+2]):
1374                     next_words = lines[i+2].split()
1375                     if len(next_words) > 0 and next_words[0] == words[0]:
1376                         # a single blank with a property different from the
1377                         # previous and the next line must not be changed
1378                         i = i + 2
1379                         continue
1380                 lines[i+1] = lines[i+1][1:]
1381                 added_lines = [" "]
1382                 for k in changes.keys():
1383                     # exclude property k because that is already in lines[i]
1384                     if k != words[0]:
1385                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1386                 for k in changes.keys():
1387                     # exclude property k because that must be added below anyway
1388                     if k != words[0]:
1389                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1390                 # Reset property for the space
1391                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1392                 lines[i:i] = added_lines
1393                 i = i + len(added_lines)
1394
1395         i = i + 1
1396
1397
1398 def revert_utf8x(document):
1399     " Set utf8x encoding to utf8. "
1400     i = find_token(document.header, "\\inputencoding", 0)
1401     if i == -1:
1402         document.header.append("\\inputencoding auto")
1403     else:
1404         inputenc = get_value(document.header, "\\inputencoding", i)
1405         if inputenc == "utf8x":
1406             document.header[i] = "\\inputencoding utf8"
1407     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1408
1409
1410 def revert_utf8plain(document):
1411     " Set utf8plain encoding to utf8. "
1412     i = find_token(document.header, "\\inputencoding", 0)
1413     if i == -1:
1414         document.header.append("\\inputencoding auto")
1415     else:
1416         inputenc = get_value(document.header, "\\inputencoding", i)
1417         if inputenc == "utf8-plain":
1418             document.header[i] = "\\inputencoding utf8"
1419     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1420
1421
1422 def revert_beamer_alert(document):
1423     " Revert beamer's \\alert inset back to ERT. "
1424     i = 0
1425     while 1:
1426         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1427         if i == -1:
1428             return
1429         document.body[i] = "\\begin_inset ERT"
1430         i = i + 1
1431         while 1:
1432             if (document.body[i][:13] == "\\begin_layout"):
1433                 # Insert the \alert command
1434                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1435                 break
1436             i = i + 1
1437
1438         i = i + 1
1439
1440
1441 def revert_beamer_structure(document):
1442     " Revert beamer's \\structure inset back to ERT. "
1443     i = 0
1444     while 1:
1445         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1446         if i == -1:
1447             return
1448         document.body[i] = "\\begin_inset ERT"
1449         i = i + 1
1450         while 1:
1451             if (document.body[i][:13] == "\\begin_layout"):
1452                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1453                 break
1454             i = i + 1
1455
1456         i = i + 1
1457
1458
1459 def convert_changes(document):
1460     " Switch output_changes off if tracking_changes is off. "
1461     i = find_token(document.header, '\\tracking_changes', 0)
1462     if i == -1:
1463         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1464         return
1465     j = find_token(document.header, '\\output_changes', 0)
1466     if j == -1:
1467         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1468         return
1469     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1470     output_changes = get_value(document.header, "\\output_changes", j)
1471     if tracking_changes == "false" and output_changes == "true":
1472         document.header[j] = "\\output_changes false"
1473
1474
1475 def revert_ascii(document):
1476     " Set ascii encoding to auto. "
1477     i = find_token(document.header, "\\inputencoding", 0)
1478     if i == -1:
1479         document.header.append("\\inputencoding auto")
1480     else:
1481         inputenc = get_value(document.header, "\\inputencoding", i)
1482         if inputenc == "ascii":
1483             document.header[i] = "\\inputencoding auto"
1484     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1485
1486
1487 def normalize_language_name(document):
1488     lang = { "brazil": "brazilian",
1489              "portuges": "portuguese"}
1490
1491     if document.language in lang:
1492         document.language = lang[document.language]
1493         i = find_token(document.header, "\\language", 0)
1494         document.header[i] = "\\language %s" % document.language
1495
1496
1497 def revert_language_name(document):
1498     lang = { "brazilian": "brazil",
1499              "portuguese": "portuges"}
1500
1501     if document.language in lang:
1502         document.language = lang[document.language]
1503         i = find_token(document.header, "\\language", 0)
1504         document.header[i] = "\\language %s" % document.language
1505
1506 #
1507 #  \textclass cv -> \textclass simplecv
1508 def convert_cv_textclass(document):
1509     if document.textclass == "cv":
1510         document.textclass = "simplecv"
1511
1512
1513 def revert_cv_textclass(document):
1514     if document.textclass == "simplecv":
1515         document.textclass = "cv"
1516
1517
1518 #
1519 # add scaleBeforeRotation graphics param
1520 def convert_graphics_rotation(document):
1521     " add scaleBeforeRotation graphics parameter. "
1522     i = 0
1523     while 1:
1524         i = find_token(document.body, "\\begin_inset Graphics", i)
1525         if i == -1:
1526             return
1527         j = find_end_of_inset(document.body, i+1)
1528         if j == -1:
1529             # should not happen
1530             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1531         # Seach for rotateAngle and width or height or scale
1532         # If these params are not there, nothing needs to be done.
1533         k = find_token(document.body, "\trotateAngle", i + 1, j)
1534         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1535         if (k != -1 and l != -1):
1536             document.body.insert(j, 'scaleBeforeRotation')
1537         i = i + 1
1538
1539
1540 #
1541 # remove scaleBeforeRotation graphics param
1542 def revert_graphics_rotation(document):
1543     " remove scaleBeforeRotation graphics parameter. "
1544     i = 0
1545     while 1:
1546         i = find_token(document.body, "\\begin_inset Graphics", i)
1547         if i == -1:
1548             return
1549         j = find_end_of_inset(document.body, i + 1)
1550         if j == -1:
1551             # should not happen
1552             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1553         # If there's a scaleBeforeRotation param, just remove that
1554         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1555         if k != -1:
1556             del document.body[k]
1557         else:
1558             # if not, and if we have rotateAngle and width or height or scale,
1559             # we have to put the rotateAngle value to special
1560             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1561             special = get_value(document.body, 'special', i + 1, j)
1562             if rotateAngle != "":
1563                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1564                 if k == -1:
1565                     break
1566                 if special == "":
1567                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1568                 else:
1569                     l = find_token(document.body, "\tspecial", i + 1, j)
1570                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1571                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1572                 if k != -1:
1573                     del document.body[k]
1574         i = i + 1
1575
1576
1577
1578 def convert_tableborder(document):
1579     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1580     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1581     i = 0
1582     while i < len(document.body):
1583         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1584         k = document.body[i].find("|>{", 0, len(document.body[i]))
1585         # the two tokens have to be in one line
1586         if (h != -1 and k != -1):
1587             # delete the "|"
1588             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1589         i = i + 1
1590
1591
1592 def revert_tableborder(document):
1593     i = 0
1594     while i < len(document.body):
1595         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1596         k = document.body[i].find(">{", 0, len(document.body[i]))
1597         # the two tokens have to be in one line
1598         if (h != -1 and k != -1):
1599             # add the "|"
1600             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1601         i = i + 1
1602
1603
1604 def revert_armenian(document):
1605
1606     # set inputencoding from armscii8 to auto
1607     if document.inputencoding == "armscii8":
1608         i = find_token(document.header, "\\inputencoding", 0)
1609         if i != -1:
1610             document.header[i] = "\\inputencoding auto"
1611     # check if preamble exists, if not k is set to -1
1612     i = 0
1613     k = -1
1614     while i < len(document.preamble):
1615         if k == -1:
1616             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1617         if k == -1:
1618             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1619         i = i + 1
1620     # add the entry \usepackage{armtex} to the document preamble
1621     if document.language == "armenian":
1622         # set the armtex entry as the first preamble line
1623         if k != -1:
1624             document.preamble[0:0] = ["\\usepackage{armtex}"]
1625         # create the preamble when it doesn't exist
1626         else:
1627             document.preamble.append('\\usepackage{armtex}')
1628     # Set document language from armenian to english
1629     if document.language == "armenian":
1630         document.language = "english"
1631         i = find_token(document.header, "\\language", 0)
1632         if i != -1:
1633             document.header[i] = "\\language english"
1634
1635
1636 def revert_CJK(document):
1637     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1638     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1639                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1640     i = find_token(document.header, "\\inputencoding", 0)
1641     if i == -1:
1642         document.header.append("\\inputencoding auto")
1643     else:
1644         inputenc = get_value(document.header, "\\inputencoding", i)
1645         if inputenc in encodings:
1646             document.header[i] = "\\inputencoding default"
1647     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1648
1649     if document.language == "chinese-simplified" or \
1650        document.language == "chinese-traditional" or \
1651        document.language == "japanese" or document.language == "korean":
1652         document.language = "english"
1653         i = find_token(document.header, "\\language", 0)
1654         if i != -1:
1655             document.header[i] = "\\language english"
1656
1657
1658 def revert_preamble_listings_params(document):
1659     " Revert preamble option \listings_params "
1660     i = find_token(document.header, "\\listings_params", 0)
1661     if i != -1:
1662         document.preamble.append('\\usepackage{listings}')
1663         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1664         document.header.pop(i);
1665
1666
1667 def revert_listings_inset(document):
1668     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1669 FROM
1670
1671 \begin_inset
1672 lstparams "language=Delphi"
1673 inline true
1674 status open
1675
1676 \begin_layout Standard
1677 var i = 10;
1678 \end_layout
1679
1680 \end_inset
1681
1682 TO
1683
1684 \begin_inset ERT
1685 status open
1686 \begin_layout Standard
1687
1688
1689 \backslash
1690 lstinline[language=Delphi]{var i = 10;}
1691 \end_layout
1692
1693 \end_inset
1694
1695 There can be an caption inset in this inset
1696
1697 \begin_layout Standard
1698 \begin_inset Caption
1699
1700 \begin_layout Standard
1701 before label
1702 \begin_inset LatexCommand label
1703 name "lst:caption"
1704
1705 \end_inset
1706
1707 after label
1708 \end_layout
1709
1710 \end_inset
1711
1712
1713 \end_layout
1714
1715 '''
1716     i = 0
1717     while True:
1718         i = find_token(document.body, '\\begin_inset listings', i)
1719         if i == -1:
1720             break
1721         else:
1722             if not '\\usepackage{listings}' in document.preamble:
1723                 document.preamble.append('\\usepackage{listings}')
1724         j = find_end_of_inset(document.body, i + 1)
1725         if j == -1:
1726             # this should not happen
1727             break
1728         inline = 'false'
1729         params = ''
1730         status = 'open'
1731         # first three lines
1732         for line in range(i + 1, i + 4):
1733             if document.body[line].startswith('inline'):
1734                 inline = document.body[line].split()[1]
1735             if document.body[line].startswith('lstparams'):
1736                 params = document.body[line].split()[1].strip('"')
1737             if document.body[line].startswith('status'):
1738                 status = document.body[line].split()[1].strip()
1739                 k = line + 1
1740         # caption?
1741         caption = ''
1742         label = ''
1743         cap = find_token(document.body, '\\begin_inset Caption', i)
1744         if cap != -1:
1745             cap_end = find_end_of_inset(document.body, cap + 1)
1746             if cap_end == -1:
1747                 # this should not happen
1748                 break
1749             # label?
1750             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1751             if lbl != -1:
1752                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1753                 if lbl_end == -1:
1754                     # this should not happen
1755                     break
1756             else:
1757                 lbl = cap_end
1758                 lbl_end = cap_end
1759             for line in document.body[lbl : lbl_end + 1]:
1760                 if line.startswith('name '):
1761                     label = line.split()[1].strip('"')
1762                     break
1763             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1764                 if not line.startswith('\\'):
1765                     caption += line.strip()
1766             k = cap_end + 1
1767         inlinecode = ''
1768         # looking for the oneline code for lstinline
1769         inlinecode = document.body[find_end_of_layout(document.body,
1770             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1771         if len(caption) > 0:
1772             if len(params) == 0:
1773                 params = 'caption={%s}' % caption
1774             else:
1775                 params += ',caption={%s}' % caption
1776         if len(label) > 0:
1777             if len(params) == 0:
1778                 params = 'label={%s}' % label
1779             else:
1780                 params += ',label={%s}' % label
1781         if len(params) > 0:
1782             params = '[%s]' % params
1783             params = params.replace('\\', '\\backslash\n')
1784         if inline == 'true':
1785             document.body[i:(j+1)] = [r'\begin_inset ERT',
1786                                       'status %s' % status,
1787                                       r'\begin_layout %s' % document.default_layout,
1788                                       '',
1789                                       '',
1790                                       r'\backslash',
1791                                       'lstinline%s{%s}' % (params, inlinecode),
1792                                       r'\end_layout',
1793                                       '',
1794                                       r'\end_inset']
1795         else:
1796             document.body[i: j+1] =  [r'\begin_inset ERT',
1797                                       'status %s' % status,
1798                                       '',
1799                                       r'\begin_layout %s' % document.default_layout,
1800                                       '',
1801                                       '',
1802                                       r'\backslash',
1803                                       r'begin{lstlisting}%s' % params,
1804                                       r'\end_layout',
1805                                       '',
1806                                       r'\begin_layout %s' % document.default_layout,
1807                                     ] + document.body[k : j - 1] + \
1808                                      ['',
1809                                       r'\begin_layout %s' % document.default_layout,
1810                                       '',
1811                                       r'\backslash',
1812                                       'end{lstlisting}',
1813                                       r'\end_layout',
1814                                       '',
1815                                       r'\end_inset']
1816
1817
1818 def revert_include_listings(document):
1819     r''' Revert lstinputlisting Include option , translate
1820 \begin_inset Include \lstinputlisting{file}[opt]
1821 preview false
1822
1823 \end_inset
1824
1825 TO
1826
1827 \begin_inset ERT
1828 status open
1829
1830 \begin_layout Standard
1831
1832
1833 \backslash
1834 lstinputlisting{file}[opt]
1835 \end_layout
1836
1837 \end_inset
1838     '''
1839
1840     i = 0
1841     while True:
1842         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1843         if i == -1:
1844             break
1845         else:
1846             if not '\\usepackage{listings}' in document.preamble:
1847                 document.preamble.append('\\usepackage{listings}')
1848         j = find_end_of_inset(document.body, i + 1)
1849         if j == -1:
1850             # this should not happen
1851             break
1852         # find command line lstinputlisting{file}[options]
1853         cmd, file, option = '', '', ''
1854         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1855             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1856         option = option.replace('\\', '\\backslash\n')
1857         document.body[i : j + 1] = [r'\begin_inset ERT',
1858                                     'status open',
1859                                     '',
1860                                     r'\begin_layout %s' % document.default_layout,
1861                                     '',
1862                                     '',
1863                                     r'\backslash',
1864                                     '%s%s{%s}' % (cmd, option, file),
1865                                     r'\end_layout',
1866                                     '',
1867                                     r'\end_inset']
1868
1869
1870 def revert_ext_font_sizes(document):
1871     if document.backend != "latex": return
1872     if not document.textclass.startswith("ext"): return
1873
1874     fontsize = get_value(document.header, '\\paperfontsize', 0)
1875     if fontsize not in ('10', '11', '12'): return
1876     fontsize += 'pt'
1877
1878     i = find_token(document.header, '\\paperfontsize', 0)
1879     document.header[i] = '\\paperfontsize default'
1880
1881     i = find_token(document.header, '\\options', 0)
1882     if i == -1:
1883         i = find_token(document.header, '\\textclass', 0) + 1
1884         document.header[i:i] = ['\\options %s' % fontsize]
1885     else:
1886         document.header[i] += ',%s' % fontsize
1887
1888
1889 def convert_ext_font_sizes(document):
1890     if document.backend != "latex": return
1891     if not document.textclass.startswith("ext"): return
1892
1893     fontsize = get_value(document.header, '\\paperfontsize', 0)
1894     if fontsize != 'default': return
1895
1896     i = find_token(document.header, '\\options', 0)
1897     if i == -1: return
1898
1899     options = get_value(document.header, '\\options', i)
1900
1901     fontsizes = '10pt', '11pt', '12pt'
1902     for fs in fontsizes:
1903         if options.find(fs) != -1:
1904             break
1905     else: # this else will only be attained if the for cycle had no match
1906         return
1907
1908     options = options.split(',')
1909     for j, opt in enumerate(options):
1910         if opt in fontsizes:
1911             fontsize = opt[:-2]
1912             del options[j]
1913             break
1914     else:
1915         return
1916
1917     k = find_token(document.header, '\\paperfontsize', 0)
1918     document.header[k] = '\\paperfontsize %s' % fontsize
1919
1920     if options:
1921         document.header[i] = '\\options %s' % ','.join(options)
1922     else:
1923         del document.header[i]
1924
1925
1926 def revert_separator_layout(document):
1927     r'''Revert --Separator-- to a lyx note
1928 From
1929
1930 \begin_layout --Separator--
1931 something
1932 \end_layout
1933
1934 to
1935
1936 \begin_layout Standard
1937 \begin_inset Note Note
1938 status open
1939
1940 \begin_layout Standard
1941 Separate Evironment
1942 \end_layout
1943
1944 \end_inset
1945 something
1946
1947 \end_layout
1948
1949     '''
1950
1951     i = 0
1952     while True:
1953         i = find_token(document.body, r'\begin_layout --Separator--', i)
1954         if i == -1:
1955             break
1956         j = find_end_of_layout(document.body, i + 1)
1957         if j == -1:
1958             # this should not happen
1959             break
1960         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1961                                     r'\begin_inset Note Note',
1962                                     'status open',
1963                                     '',
1964                                     r'\begin_layout %s' % document.default_layout,
1965                                     'Separate Environment',
1966                                     r'\end_layout',
1967                                     '',
1968                                     r'\end_inset'] + \
1969                                     document.body[ i + 1 : j] + \
1970                                     ['',
1971                                     r'\end_layout'
1972                                     ]
1973
1974
1975 def convert_arabic (document):
1976     if document.language == "arabic":
1977         document.language = "arabic_arabtex"
1978         i = find_token(document.header, "\\language", 0)
1979         if i != -1:
1980             document.header[i] = "\\language arabic_arabtex"
1981     i = 0
1982     while i < len(document.body):
1983         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1984         if (h != -1):
1985             # change the language name
1986             document.body[i] = '\lang arabic_arabtex'
1987         i = i + 1
1988
1989
1990 def revert_arabic (document):
1991     if document.language == "arabic_arabtex":
1992         document.language = "arabic"
1993         i = find_token(document.header, "\\language", 0)
1994         if i != -1:
1995             document.header[i] = "\\language arabic"
1996     i = 0
1997     while i < len(document.body):
1998         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1999         if (h != -1):
2000             # change the language name
2001             document.body[i] = '\lang arabic'
2002         i = i + 1
2003
2004
2005 ##
2006 # Conversion hub
2007 #
2008
2009 supported_versions = ["1.5.0","1.5"]
2010 convert = [[246, []],
2011            [247, [convert_font_settings]],
2012            [248, []],
2013            [249, [convert_utf8]],
2014            [250, []],
2015            [251, []],
2016            [252, [convert_commandparams, convert_bibitem]],
2017            [253, []],
2018            [254, [convert_esint]],
2019            [255, []],
2020            [256, []],
2021            [257, [convert_caption]],
2022            [258, [convert_lyxline]],
2023            [259, [convert_accent, normalize_font_whitespace_259]],
2024            [260, []],
2025            [261, [convert_changes]],
2026            [262, []],
2027            [263, [normalize_language_name]],
2028            [264, [convert_cv_textclass]],
2029            [265, [convert_tableborder]],
2030            [266, []],
2031            [267, []],
2032            [268, []],
2033            [269, []],
2034            [270, []],
2035            [271, [convert_ext_font_sizes]],
2036            [272, []],
2037            [273, []],
2038            [274, [normalize_font_whitespace_274]],
2039            [275, [convert_graphics_rotation]],
2040            [276, [convert_arabic]]
2041           ]
2042
2043 revert =  [
2044            [275, [revert_arabic]],
2045            [274, [revert_graphics_rotation]],
2046            [273, []],
2047            [272, [revert_separator_layout]],
2048            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2049            [270, [revert_ext_font_sizes]],
2050            [269, [revert_beamer_alert, revert_beamer_structure]],
2051            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2052            [267, [revert_CJK]],
2053            [266, [revert_utf8plain]],
2054            [265, [revert_armenian]],
2055            [264, [revert_tableborder]],
2056            [263, [revert_cv_textclass]],
2057            [262, [revert_language_name]],
2058            [261, [revert_ascii]],
2059            [260, []],
2060            [259, [revert_utf8x]],
2061            [258, []],
2062            [257, []],
2063            [256, [revert_caption]],
2064            [255, [revert_encodings]],
2065            [254, [revert_clearpage, revert_cleardoublepage]],
2066            [253, [revert_esint]],
2067            [252, [revert_nomenclature, revert_printnomenclature]],
2068            [251, [revert_commandparams]],
2069            [250, [revert_cs_label]],
2070            [249, []],
2071            [248, [revert_accent, revert_utf8, revert_unicode]],
2072            [247, [revert_booktabs]],
2073            [246, [revert_font_settings]],
2074            [245, [revert_framed]]]
2075
2076
2077 if __name__ == "__main__":
2078     pass