lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if not forward: # no need to read file unless we are reverting
 257         spec_chars = read_unicodesymbols()
 258
 259     if document.inputencoding == "auto" or document.inputencoding == "default":
 260         i = 0
 261         while i < len(document.body):
 262             result = lang_re.match(document.body[i])
 263             if result:
 264                 language = result.group(1)
 265                 if language == "default":
 266                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 267                     encoding_stack[-1] = document.encoding
 268                 else:
 269                     from lyx2lyx_lang import lang
 270                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 271                     encoding_stack[-1] = lang[language][3]
 272             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 273                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 274                 if len(insets) > 0 and insets[-1] in inset_types:
 275                     from lyx2lyx_lang import lang
 276                     encoding_stack.append(lang[document.language][3])
 277                 else:
 278                     encoding_stack.append(encoding_stack[-1])
 279             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 280                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 281                 if len(encoding_stack) == 1:
 282                     # Don't remove the document encoding from the stack
 283                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 284                 else:
 285                     del encoding_stack[-1]
 286             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 287                 inset_result = inset_re.match(document.body[i])
 288                 if inset_result:
 289                     insets.append(inset_result.group(1))
 290                 else:
 291                     insets.append("")
 292             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 293                 del insets[-1]
 294             if encoding_stack[-1] != document.encoding:
 295                 if forward:
 296                     # This line has been incorrectly interpreted as if it was
 297                     # encoded in 'encoding'.
 298                     # Convert back to the 8bit string that was in the file.
 299                     orig = document.body[i].encode(document.encoding)
 300                     # Convert the 8bit string that was in the file to unicode
 301                     # with the correct encoding.
 302                     document.body[i] = orig.decode(encoding_stack[-1])
 303                 else:
 304                     try:
 305                         # Convert unicode to the 8bit string that will be written
 306                         # to the file with the correct encoding.
 307                         orig = document.body[i].encode(encoding_stack[-1])
 308                         # Convert the 8bit string that will be written to the
 309                         # file to fake unicode with the encoding that will later
 310                         # be used when writing to the file.
 311                         document.body[i] = orig.decode(document.encoding)
 312                     except:
 313                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 314                         document.body[i:i+1] = mod_line.split('\n')
 315                         i += len(mod_line.split('\n')) - 1
 316             i += 1
 317
 318
 319 def convert_utf8(document):
 320     " Set document encoding to UTF-8. "
 321     convert_multiencoding(document, True)
 322     document.encoding = "utf8"
 323
 324
 325 def revert_utf8(document):
 326     " Set document encoding to the value corresponding to inputencoding. "
 327     i = find_token(document.header, "\\inputencoding", 0)
 328     if i == -1:
 329         document.header.append("\\inputencoding auto")
 330     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 331         document.header[i] = "\\inputencoding auto"
 332     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 333     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 334     convert_multiencoding(document, False)
 335
 336
 337 def read_unicodesymbols():
 338     " Read the unicodesymbols list of unicode characters and corresponding commands."
 339     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 340     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 341     spec_chars = {}
 342     for line in fp.readlines():
 343         if line[0] != '#':
 344             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 345             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 346             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 347             try:
 348                 # flag1 and flag2 are preamble and other flags
 349                 [ucs4,command,flag1,flag2] =line.split(None,3)
 350                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 351             except:
 352                 pass
 353     fp.close()
 354     return spec_chars
 355
 356
 357 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 358     # Define strings to start and end ERT and math insets
 359     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 360     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 361     math_intro='\n\\begin_inset Formula $'
 362     math_outro='$\n\\end_inset'
 363
 364     mod_line = u''
 365     if i and not is_inset_line(document, i-1):
 366         last_char = document.body[i - 1][-1:]
 367     else:
 368         last_char = ''
 369
 370     line = document.body[i]
 371     for character in line:
 372         try:
 373             # Try to write the character
 374             dummy = character.encode(document.encoding)
 375             mod_line += character
 376             last_char = character
 377         except:
 378             # Try to replace with ERT/math inset
 379             if spec_chars.has_key(character):
 380                 command = spec_chars[character][0] # the command to replace unicode
 381                 flag1 = spec_chars[character][1]
 382                 flag2 = spec_chars[character][2]
 383                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 384                     # We have a character that should be combined with the previous
 385                     command += '{' + last_char + '}'
 386                     # Remove the last character. Ignore if it is whitespace
 387                     if len(last_char.rstrip()):
 388                         # last_char was found and is not whitespace
 389                         if mod_line:
 390                             mod_line = mod_line[:-1]
 391                         else: # last_char belongs to the last line
 392                             document.body[i-1] = document.body[i-1][:-1]
 393                     else:
 394                         # The last character was replaced by a command. For now it is
 395                         # ignored. This could be handled better.
 396                         pass
 397                 if command[0:2] == '\\\\':
 398                     if command[2:12]=='ensuremath':
 399                         if insets and insets[-1] == "ERT":
 400                             # math in ERT
 401                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 402                             command = command.replace('}', '$\n')
 403                         elif not insets or insets[-1] != "Formula":
 404                             # add a math inset with the replacement character
 405                             command = command.replace('\\\\ensuremath{\\', math_intro)
 406                             command = command.replace('}', math_outro)
 407                         else:
 408                             # we are already in a math inset
 409                             command = command.replace('\\\\ensuremath{\\', '')
 410                             command = command.replace('}', '')
 411                     else:
 412                         if insets and insets[-1] == "Formula":
 413                             # avoid putting an ERT in a math; instead put command as text
 414                             command = command.replace('\\\\', '\mathrm{')
 415                             command = command + '}'
 416                         elif not insets or insets[-1] != "ERT":
 417                             # add an ERT inset with the replacement character
 418                             command = command.replace('\\\\', ert_intro)
 419                             command = command + ert_outro
 420                         else:
 421                             command = command.replace('\\\\', '\n\\backslash\n')
 422                     last_char = '' # indicate that the character should not be removed
 423                 mod_line += command
 424             else:
 425                 # Replace with replacement string
 426                 mod_line += replacement_character
 427     return mod_line
 428
 429
 430 def revert_unicode(document):
 431     '''Transform unicode characters that can not be written using the
 432 document encoding to commands according to the unicodesymbols
 433 file. Characters that can not be replaced by commands are replaced by
 434 an replacement string.  Flags other than 'combined' are currently not
 435 implemented.'''
 436     spec_chars = read_unicodesymbols()
 437     insets = [] # list of active insets
 438
 439     # Go through the document to capture all combining characters
 440     i = 0
 441     while i < len(document.body):
 442         line = document.body[i]
 443         # Check for insets
 444         if line.find('\\begin_inset') > -1:
 445             insets.append(line[13:].split()[0])
 446         if line.find('\\end_inset') > -1:
 447             del insets[-1]
 448
 449         # Try to write the line
 450         try:
 451             # If all goes well the line is written here
 452             dummy = line.encode(document.encoding)
 453             i += 1
 454         except:
 455             # Error, some character(s) in the line need to be replaced
 456             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 457             document.body[i:i+1] = mod_line.split('\n')
 458             i += len(mod_line.split('\n'))
 459
 460
 461 def revert_cs_label(document):
 462     " Remove status flag of charstyle label. "
 463     i = 0
 464     while 1:
 465         i = find_token(document.body, "\\begin_inset CharStyle", i)
 466         if i == -1:
 467             return
 468         # Seach for a line starting 'show_label'
 469         # If it is not there, break with a warning message
 470         i = i + 1
 471         while 1:
 472             if (document.body[i][:10] == "show_label"):
 473                 del document.body[i]
 474                 break
 475             elif (document.body[i][:13] == "\\begin_layout"):
 476                 document.warning("Malformed LyX document: Missing 'show_label'.")
 477                 break
 478             i = i + 1
 479
 480         i = i + 1
 481
 482
 483 def convert_bibitem(document):
 484     """ Convert
 485 \bibitem [option]{argument}
 486
 487 to
 488
 489 \begin_inset LatexCommand bibitem
 490 label "option"
 491 key "argument"
 492
 493 \end_inset
 494
 495 This must be called after convert_commandparams.
 496 """
 497     i = 0
 498     while 1:
 499         i = find_token(document.body, "\\bibitem", i)
 500         if i == -1:
 501             break
 502         j = document.body[i].find('[') + 1
 503         k = document.body[i].rfind(']')
 504         if j == 0: # No optional argument found
 505             option = None
 506         else:
 507             option = document.body[i][j:k]
 508         j = document.body[i].rfind('{') + 1
 509         k = document.body[i].rfind('}')
 510         argument = document.body[i][j:k]
 511         lines = ['\\begin_inset LatexCommand bibitem']
 512         if option != None:
 513             lines.append('label "%s"' % option.replace('"', '\\"'))
 514         lines.append('key "%s"' % argument.replace('"', '\\"'))
 515         lines.append('')
 516         lines.append('\\end_inset')
 517         document.body[i:i+1] = lines
 518         i = i + 1
 519
 520
 521 commandparams_info = {
 522     # command : [option1, option2, argument]
 523     "bibitem" : ["label", "", "key"],
 524     "bibtex" : ["options", "btprint", "bibfiles"],
 525     "cite"        : ["after", "before", "key"],
 526     "citet"       : ["after", "before", "key"],
 527     "citep"       : ["after", "before", "key"],
 528     "citealt"     : ["after", "before", "key"],
 529     "citealp"     : ["after", "before", "key"],
 530     "citeauthor"  : ["after", "before", "key"],
 531     "citeyear"    : ["after", "before", "key"],
 532     "citeyearpar" : ["after", "before", "key"],
 533     "citet*"      : ["after", "before", "key"],
 534     "citep*"      : ["after", "before", "key"],
 535     "citealt*"    : ["after", "before", "key"],
 536     "citealp*"    : ["after", "before", "key"],
 537     "citeauthor*" : ["after", "before", "key"],
 538     "Citet"       : ["after", "before", "key"],
 539     "Citep"       : ["after", "before", "key"],
 540     "Citealt"     : ["after", "before", "key"],
 541     "Citealp"     : ["after", "before", "key"],
 542     "Citeauthor"  : ["after", "before", "key"],
 543     "Citet*"      : ["after", "before", "key"],
 544     "Citep*"      : ["after", "before", "key"],
 545     "Citealt*"    : ["after", "before", "key"],
 546     "Citealp*"    : ["after", "before", "key"],
 547     "Citeauthor*" : ["after", "before", "key"],
 548     "citefield"   : ["after", "before", "key"],
 549     "citetitle"   : ["after", "before", "key"],
 550     "cite*"       : ["after", "before", "key"],
 551     "hfill" : ["", "", ""],
 552     "index"      : ["", "", "name"],
 553     "printindex" : ["", "", "name"],
 554     "label" : ["", "", "name"],
 555     "eqref"     : ["name", "", "reference"],
 556     "pageref"   : ["name", "", "reference"],
 557     "prettyref" : ["name", "", "reference"],
 558     "ref"       : ["name", "", "reference"],
 559     "vpageref"  : ["name", "", "reference"],
 560     "vref"      : ["name", "", "reference"],
 561     "tableofcontents" : ["", "", "type"],
 562     "htmlurl" : ["name", "", "target"],
 563     "url"     : ["name", "", "target"]}
 564
 565
 566 def convert_commandparams(document):
 567     """ Convert
 568
 569  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 570  \end_inset
 571
 572  to
 573
 574  \begin_inset LatexCommand cmdname
 575  name1 "opt1"
 576  name2 "opt2"
 577  name3 "arg"
 578  \end_inset
 579
 580  name1, name2 and name3 can be different for each command.
 581 """
 582     # \begin_inset LatexCommand bibitem was not the official version (see
 583     # convert_bibitem()), but could be read in, so we convert it here, too.
 584
 585     i = 0
 586     while 1:
 587         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 588         if i == -1:
 589             break
 590         command = document.body[i][26:].strip()
 591         if command == "":
 592             document.warning("Malformed LyX document: Missing LatexCommand name.")
 593             i = i + 1
 594             continue
 595
 596         j = find_token(document.body, "\\end_inset", i + 1)
 597         if j == -1:
 598             document.warning("Malformed document")
 599         else:
 600             command += "".join(document.body[i+1:j])
 601             document.body[i+1:j] = []
 602
 603         # The following parser is taken from the original InsetCommandParams::scanCommand
 604         name = ""
 605         option1 = ""
 606         option2 = ""
 607         argument = ""
 608         state = "WS"
 609         # Used to handle things like \command[foo[bar]]{foo{bar}}
 610         nestdepth = 0
 611         b = 0
 612         for c in command:
 613             if ((state == "CMDNAME" and c == ' ') or
 614                 (state == "CMDNAME" and c == '[') or
 615                 (state == "CMDNAME" and c == '{')):
 616                 state = "WS"
 617             if ((state == "OPTION" and c == ']') or
 618                 (state == "SECOPTION" and c == ']') or
 619                 (state == "CONTENT" and c == '}')):
 620                 if nestdepth == 0:
 621                     state = "WS"
 622                 else:
 623                     nestdepth = nestdepth - 1
 624             if ((state == "OPTION" and c == '[') or
 625                 (state == "SECOPTION" and c == '[') or
 626                 (state == "CONTENT" and c == '{')):
 627                 nestdepth = nestdepth + 1
 628             if state == "CMDNAME":
 629                     name += c
 630             elif state == "OPTION":
 631                     option1 += c
 632             elif state == "SECOPTION":
 633                     option2 += c
 634             elif state == "CONTENT":
 635                     argument += c
 636             elif state == "WS":
 637                 if c == '\\':
 638                     state = "CMDNAME"
 639                 elif c == '[' and b != ']':
 640                     state = "OPTION"
 641                     nestdepth = 0 # Just to be sure
 642                 elif c == '[' and b == ']':
 643                     state = "SECOPTION"
 644                     nestdepth = 0 # Just to be sure
 645                 elif c == '{':
 646                     state = "CONTENT"
 647                     nestdepth = 0 # Just to be sure
 648             b = c
 649
 650         # Now we have parsed the command, output the parameters
 651         lines = ["\\begin_inset LatexCommand %s" % name]
 652         if option1 != "":
 653             if commandparams_info[name][0] == "":
 654                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 655             else:
 656                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
 657         if option2 != "":
 658             if commandparams_info[name][1] == "":
 659                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 660             else:
 661                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
 662         if argument != "":
 663             if commandparams_info[name][2] == "":
 664                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 665             else:
 666                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
 667         document.body[i:i+1] = lines
 668         i = i + 1
 669
 670
 671 def revert_commandparams(document):
 672     regex = re.compile(r'(\S+)\s+(.+)')
 673     i = 0
 674     while 1:
 675         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 676         if i == -1:
 677             break
 678         name = document.body[i].split()[2]
 679         j = find_end_of_inset(document.body, i)
 680         preview_line = ""
 681         option1 = ""
 682         option2 = ""
 683         argument = ""
 684         for k in range(i + 1, j):
 685             match = re.match(regex, document.body[k])
 686             if match:
 687                 pname = match.group(1)
 688                 pvalue = match.group(2)
 689                 if pname == "preview":
 690                     preview_line = document.body[k]
 691                 elif (commandparams_info[name][0] != "" and
 692                       pname == commandparams_info[name][0]):
 693                     option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 694                 elif (commandparams_info[name][1] != "" and
 695                       pname == commandparams_info[name][1]):
 696                     option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 697                 elif (commandparams_info[name][2] != "" and
 698                       pname == commandparams_info[name][2]):
 699                     argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 700             elif document.body[k].strip() != "":
 701                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 702         if name == "bibitem":
 703             if option1 == "":
 704                 lines = ["\\bibitem {%s}" % argument]
 705             else:
 706                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 707         else:
 708             if option1 == "":
 709                 if option2 == "":
 710                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 711                 else:
 712                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 713             else:
 714                 if option2 == "":
 715                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 716                 else:
 717                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 718         if name != "bibitem":
 719             if preview_line != "":
 720                 lines.append(preview_line)
 721             lines.append('')
 722             lines.append('\\end_inset')
 723         document.body[i:j+1] = lines
 724         i += len(lines) + 1
 725
 726
 727 def revert_nomenclature(document):
 728     " Convert nomenclature entry to ERT. "
 729     regex = re.compile(r'(\S+)\s+(.+)')
 730     i = 0
 731     use_nomencl = 0
 732     while 1:
 733         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 734         if i == -1:
 735             break
 736         use_nomencl = 1
 737         j = find_end_of_inset(document.body, i + 1)
 738         preview_line = ""
 739         symbol = ""
 740         description = ""
 741         prefix = ""
 742         for k in range(i + 1, j):
 743             match = re.match(regex, document.body[k])
 744             if match:
 745                 name = match.group(1)
 746                 value = match.group(2)
 747                 if name == "preview":
 748                     preview_line = document.body[k]
 749                 elif name == "symbol":
 750                     symbol = value.strip('"').replace('\\"', '"')
 751                 elif name == "description":
 752                     description = value.strip('"').replace('\\"', '"')
 753                 elif name == "prefix":
 754                     prefix = value.strip('"').replace('\\"', '"')
 755             elif document.body[k].strip() != "":
 756                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 757         if prefix == "":
 758             command = 'nomenclature{%s}{%s}' % (symbol, description)
 759         else:
 760             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 761         document.body[i:j+1] = ['\\begin_inset ERT',
 762                                 'status collapsed',
 763                                 '',
 764                                 '\\begin_layout %s' % document.default_layout,
 765                                 '',
 766                                 '',
 767                                 '\\backslash',
 768                                 command,
 769                                 '\\end_layout',
 770                                 '',
 771                                 '\\end_inset']
 772         i = i + 11
 773     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 774         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 775         document.preamble.append('\\makenomenclature')
 776
 777
 778 def revert_printnomenclature(document):
 779     " Convert printnomenclature to ERT. "
 780     regex = re.compile(r'(\S+)\s+(.+)')
 781     i = 0
 782     use_nomencl = 0
 783     while 1:
 784         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 785         if i == -1:
 786             break
 787         use_nomencl = 1
 788         j = find_end_of_inset(document.body, i + 1)
 789         preview_line = ""
 790         labelwidth = ""
 791         for k in range(i + 1, j):
 792             match = re.match(regex, document.body[k])
 793             if match:
 794                 name = match.group(1)
 795                 value = match.group(2)
 796                 if name == "preview":
 797                     preview_line = document.body[k]
 798                 elif name == "labelwidth":
 799                     labelwidth = value.strip('"').replace('\\"', '"')
 800             elif document.body[k].strip() != "":
 801                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 802         if labelwidth == "":
 803             command = 'nomenclature{}'
 804         else:
 805             command = 'nomenclature[%s]' % labelwidth
 806         document.body[i:j+1] = ['\\begin_inset ERT',
 807                                 'status collapsed',
 808                                 '',
 809                                 '\\begin_layout %s' % document.default_layout,
 810                                 '',
 811                                 '',
 812                                 '\\backslash',
 813                                 command,
 814                                 '\\end_layout',
 815                                 '',
 816                                 '\\end_inset']
 817         i = i + 11
 818     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 819         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 820         document.preamble.append('\\makenomenclature')
 821
 822
 823 def convert_esint(document):
 824     " Add \\use_esint setting to header. "
 825     i = find_token(document.header, "\\cite_engine", 0)
 826     if i == -1:
 827         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 828         return
 829     # 0 is off, 1 is auto, 2 is on.
 830     document.header.insert(i, '\\use_esint 0')
 831
 832
 833 def revert_esint(document):
 834     " Remove \\use_esint setting from header. "
 835     i = find_token(document.header, "\\use_esint", 0)
 836     if i == -1:
 837         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 838         return
 839     use_esint = document.header[i].split()[1]
 840     del document.header[i]
 841     # 0 is off, 1 is auto, 2 is on.
 842     if (use_esint == 2):
 843         document.preamble.append('\\usepackage{esint}')
 844
 845
 846 def revert_clearpage(document):
 847     " clearpage -> ERT "
 848     i = 0
 849     while 1:
 850         i = find_token(document.body, "\\clearpage", i)
 851         if i == -1:
 852             break
 853         document.body[i:i+1] =  ['\\begin_inset ERT',
 854                                 'status collapsed',
 855                                 '',
 856                                 '\\begin_layout %s' % document.default_layout,
 857                                 '',
 858                                 '',
 859                                 '\\backslash',
 860                                 'clearpage',
 861                                 '\\end_layout',
 862                                 '',
 863                                 '\\end_inset']
 864     i = i + 1
 865
 866
 867 def revert_cleardoublepage(document):
 868     " cleardoublepage -> ERT "
 869     i = 0
 870     while 1:
 871         i = find_token(document.body, "\\cleardoublepage", i)
 872         if i == -1:
 873             break
 874         document.body[i:i+1] =  ['\\begin_inset ERT',
 875                                 'status collapsed',
 876                                 '',
 877                                 '\\begin_layout %s' % document.default_layout,
 878                                 '',
 879                                 '',
 880                                 '\\backslash',
 881                                 'cleardoublepage',
 882                                 '\\end_layout',
 883                                 '',
 884                                 '\\end_inset']
 885     i = i + 1
 886
 887
 888 def convert_lyxline(document):
 889     " remove fontsize commands for \lyxline "
 890     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 891     # to change the line thickness. The new definiton does this so that imported
 892     # \lyxlines would have a different line thickness. The eventual fontsize command
 893     # before \lyxline is therefore removed to get the same output.
 894     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 895                  "large", "Large", "LARGE", "huge", "Huge"]
 896     for n in range(0, len(fontsizes)):
 897         i = 0
 898         k = 0
 899         while i < len(document.body):
 900             i = find_token(document.body, "\\size " + fontsizes[n], i)
 901             k = find_token(document.body, "\\lyxline", i)
 902             # the corresponding fontsize command is always 2 lines before the \lyxline
 903             if (i != -1 and k == i+2):
 904                 document.body[i:i+1] = []
 905             else:
 906                 break
 907         i = i + 1
 908
 909
 910 def revert_encodings(document):
 911     " Set new encodings to auto. "
 912     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 913                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 914                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 915     i = find_token(document.header, "\\inputencoding", 0)
 916     if i == -1:
 917         document.header.append("\\inputencoding auto")
 918     else:
 919         inputenc = get_value(document.header, "\\inputencoding", i)
 920         if inputenc in encodings:
 921             document.header[i] = "\\inputencoding auto"
 922     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 923
 924
 925 def convert_caption(document):
 926     " Convert caption layouts to caption insets. "
 927     i = 0
 928     while 1:
 929         i = find_token(document.body, "\\begin_layout Caption", i)
 930         if i == -1:
 931             return
 932         j = find_end_of_layout(document.body, i)
 933         if j == -1:
 934             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 935             return
 936
 937         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 938         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 939                             "\\begin_inset Caption", "",
 940                             "\\begin_layout %s" % document.default_layout]
 941         i = i + 1
 942
 943
 944 def revert_caption(document):
 945     " Convert caption insets to caption layouts. "
 946     " This assumes that the text class has a caption style. "
 947     i = 0
 948     while 1:
 949         i = find_token(document.body, "\\begin_inset Caption", i)
 950         if i == -1:
 951             return
 952
 953         # We either need to delete the previous \begin_layout line, or we
 954         # need to end the previous layout if this inset is not in the first
 955         # position of the paragraph.
 956         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 957         if layout_before == -1:
 958             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 959             return
 960         layout_line = document.body[layout_before]
 961         del_layout_before = True
 962         l = layout_before + 1
 963         while l < i:
 964             if document.body[l] != "":
 965                 del_layout_before = False
 966                 break
 967             l = l + 1
 968         if del_layout_before:
 969             del document.body[layout_before:i]
 970             i = layout_before
 971         else:
 972             document.body[i:i] = ["\\end_layout", ""]
 973             i = i + 2
 974
 975         # Find start of layout in the inset and end of inset
 976         j = find_token(document.body, "\\begin_layout", i)
 977         if j == -1:
 978             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 979             return
 980         k = find_end_of_inset(document.body, i)
 981         if k == -1:
 982             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 983             return
 984
 985         # We either need to delete the following \end_layout line, or we need
 986         # to restart the old layout if this inset is not at the paragraph end.
 987         layout_after = find_token(document.body, "\\end_layout", k)
 988         if layout_after == -1:
 989             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 990             return
 991         del_layout_after = True
 992         l = k + 1
 993         while l < layout_after:
 994             if document.body[l] != "":
 995                 del_layout_after = False
 996                 break
 997             l = l + 1
 998         if del_layout_after:
 999             del document.body[k+1:layout_after+1]
1000         else:
1001             document.body[k+1:k+1] = [layout_line, ""]
1002
1003         # delete \begin_layout and \end_inset and replace \begin_inset with
1004         # "\begin_layout Caption". This works because we can only have one
1005         # paragraph in the caption inset: The old \end_layout will be recycled.
1006         del document.body[k]
1007         if document.body[k] == "":
1008             del document.body[k]
1009         del document.body[j]
1010         if document.body[j] == "":
1011             del document.body[j]
1012         document.body[i] = "\\begin_layout Caption"
1013         if document.body[i+1] == "":
1014             del document.body[i+1]
1015         i = i + 1
1016
1017
1018 # Accents of InsetLaTeXAccent
1019 accent_map = {
1020     "`" : u'\u0300', # grave
1021     "'" : u'\u0301', # acute
1022     "^" : u'\u0302', # circumflex
1023     "~" : u'\u0303', # tilde
1024     "=" : u'\u0304', # macron
1025     "u" : u'\u0306', # breve
1026     "." : u'\u0307', # dot above
1027     "\"": u'\u0308', # diaeresis
1028     "r" : u'\u030a', # ring above
1029     "H" : u'\u030b', # double acute
1030     "v" : u'\u030c', # caron
1031     "b" : u'\u0320', # minus sign below
1032     "d" : u'\u0323', # dot below
1033     "c" : u'\u0327', # cedilla
1034     "k" : u'\u0328', # ogonek
1035     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1036                      # only one is given as argument, so we don't need to
1037                      # treat it differently.
1038 }
1039
1040
1041 # special accents of InsetLaTeXAccent without argument
1042 special_accent_map = {
1043     'i' : u'\u0131', # dotless i
1044     'j' : u'\u0237', # dotless j
1045     'l' : u'\u0142', # l with stroke
1046     'L' : u'\u0141'  # L with stroke
1047 }
1048
1049
1050 # special accent arguments of InsetLaTeXAccent
1051 accented_map = {
1052     '\\i' : u'\u0131', # dotless i
1053     '\\j' : u'\u0237'  # dotless j
1054 }
1055
1056
1057 def _convert_accent(accent, accented_char):
1058     type = accent
1059     char = accented_char
1060     if char == '':
1061         if type in special_accent_map:
1062             return special_accent_map[type]
1063         # a missing char is treated as space by LyX
1064         char = ' '
1065     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1066         # Special caron, only used with t, d, l and L.
1067         # It is not in the map because we convert it to the same unicode
1068         # character as the normal caron: \q{} is only defined if babel with
1069         # the czech or slovak language is used, and the normal caron
1070         # produces the correct output if the T1 font encoding is used.
1071         # For the same reason we never convert to \q{} in the other direction.
1072         type = 'v'
1073     elif char in accented_map:
1074         char = accented_map[char]
1075     elif (len(char) > 1):
1076         # We can only convert accents on a single char
1077         return ''
1078     a = accent_map.get(type)
1079     if a:
1080         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1081     return ''
1082
1083
1084 def convert_ertbackslash(body, i, ert, default_layout):
1085     r""" -------------------------------------------------------------------------------------------
1086     Convert backslashes and '\n' into valid ERT code, append the converted
1087     text to body[i] and return the (maybe incremented) line index i"""
1088
1089     for c in ert:
1090         if c == '\\':
1091             body[i] = body[i] + '\\backslash '
1092             i = i + 1
1093             body.insert(i, '')
1094         elif c == '\n':
1095             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1096             i = i + 4
1097         else:
1098             body[i] = body[i] + c
1099     return i
1100
1101
1102 def convert_accent(document):
1103     # The following forms are supported by LyX:
1104     # '\i \"{a}' (standard form, as written by LyX)
1105     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1106     # '\i \"{ }' (also accepted if the accented char is a space)
1107     # '\i \" a'  (also accepted)
1108     # '\i \"'    (also accepted)
1109     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1110     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1111     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1112     i = 0
1113     while 1:
1114         i = find_re(document.body, re_wholeinset, i)
1115         if i == -1:
1116             return
1117         match = re_wholeinset.match(document.body[i])
1118         prefix = match.group(1)
1119         contents = match.group(3).strip()
1120         match = re_contents.match(contents)
1121         if match:
1122             # Strip first char (always \)
1123             accent = match.group(1)[1:]
1124             accented_contents = match.group(2).strip()
1125             match = re_accentedcontents.match(accented_contents)
1126             accented_char = match.group(1)
1127             converted = _convert_accent(accent, accented_char)
1128             if converted == '':
1129                 # Normalize contents
1130                 contents = '%s{%s}' % (accent, accented_char),
1131             else:
1132                 document.body[i] = '%s%s' % (prefix, converted)
1133                 i += 1
1134                 continue
1135         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1136         document.body[i] = prefix
1137         document.body[i+1:i+1] = ['\\begin_inset ERT',
1138                                   'status collapsed',
1139                                   '',
1140                                   '\\begin_layout %s' % document.default_layout,
1141                                   '',
1142                                   '',
1143                                   '']
1144         i = convert_ertbackslash(document.body, i + 7,
1145                                  '\\%s' % contents,
1146                                  document.default_layout)
1147         document.body[i+1:i+1] = ['\\end_layout',
1148                                   '',
1149                                   '\\end_inset']
1150         i += 3
1151
1152
1153 def is_inset_line(document, i):
1154     """ Line i of body has an inset """
1155     if document.body[i][:1] == '\\':
1156         return True
1157     last_tokens = "".join(document.body[i].split()[-2:])
1158     return last_tokens.find('\\') != -1
1159
1160
1161 # A wrapper around normalize that handles special cases (cf. bug 3313)
1162 def normalize(form, text):
1163     # do not normalize OHM, ANGSTROM
1164     keep_characters = [0x2126,0x212b]
1165     result = ''
1166     convert = ''
1167     for i in text:
1168         if ord(i) in keep_characters:
1169             if len(convert) > 0:
1170                 result = result + unicodedata.normalize(form, convert)
1171                 convert = ''
1172             result = result + i
1173         else:
1174             convert = convert + i
1175     if len(convert) > 0:
1176         result = result + unicodedata.normalize(form, convert)
1177     return result
1178
1179
1180 def revert_accent(document):
1181     inverse_accent_map = {}
1182     for k in accent_map:
1183         inverse_accent_map[accent_map[k]] = k
1184     inverse_special_accent_map = {}
1185     for k in special_accent_map:
1186         inverse_special_accent_map[special_accent_map[k]] = k
1187     inverse_accented_map = {}
1188     for k in accented_map:
1189         inverse_accented_map[accented_map[k]] = k
1190
1191     # Since LyX may insert a line break within a word we must combine all
1192     # words before unicode normalization.
1193     # We do this only if the next line starts with an accent, otherwise we
1194     # would create things like '\begin_inset ERTstatus'.
1195     for i in range(len(document.body) - 1):
1196         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1197             continue
1198         if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1199             # the last character of this line and the first of the next line
1200             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1201             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1202                 document.body[i] += document.body[i+1][0]
1203                 document.body[i+1] = document.body[i+1][1:]
1204
1205     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1206     # This is needed to catch all accented characters.
1207     for i in range(len(document.body)):
1208         # Unfortunately we have a mixture of unicode strings and plain strings,
1209         # because we never use u'xxx' for string literals, but 'xxx'.
1210         # Therefore we may have to try two times to normalize the data.
1211         try:
1212             document.body[i] = normalize("NFD", document.body[i])
1213         except TypeError:
1214             document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8'))
1215
1216     # Replace accented characters with InsetLaTeXAccent
1217     # Do not convert characters that can be represented in the chosen
1218     # encoding.
1219     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1220     lang_re = re.compile(r"^\\lang\s(\S+)")
1221
1222     i = 0
1223     while i < len(document.body):
1224         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1225             # Track the encoding of the current line
1226             result = lang_re.match(document.body[i])
1227             if result:
1228                 language = result.group(1)
1229                 if language == "default":
1230                     encoding_stack[-1] = document.encoding
1231                 else:
1232                     from lyx2lyx_lang import lang
1233                     encoding_stack[-1] = lang[language][3]
1234                 continue
1235             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1236                 encoding_stack.append(encoding_stack[-1])
1237                 continue
1238             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1239                 del encoding_stack[-1]
1240                 continue
1241
1242         for j in range(len(document.body[i])):
1243             # dotless i and dotless j are both in special_accent_map and can
1244             # occur as an accented character, so we need to test that the
1245             # following character is no accent
1246             if (document.body[i][j] in inverse_special_accent_map and
1247                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1248                 accent = document.body[i][j]
1249                 try:
1250                     dummy = accent.encode(encoding_stack[-1])
1251                 except UnicodeEncodeError:
1252                     # Insert the rest of the line as new line
1253                     if j < len(document.body[i]) - 1:
1254                         document.body.insert(i+1, document.body[i][j+1:])
1255                     # Delete the accented character
1256                     document.body[i] = document.body[i][:j]
1257                     # Finally add the InsetLaTeXAccent
1258                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1259                     break
1260             elif j > 0 and document.body[i][j] in inverse_accent_map:
1261                 accented_char = document.body[i][j-1]
1262                 if accented_char == ' ':
1263                     # Conform to LyX output
1264                     accented_char = ''
1265                 elif accented_char in inverse_accented_map:
1266                     accented_char = inverse_accented_map[accented_char]
1267                 accent = document.body[i][j]
1268                 try:
1269                     dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1270                 except UnicodeEncodeError:
1271                     # Insert the rest of the line as new line
1272                     if j < len(document.body[i]) - 1:
1273                         document.body.insert(i+1, document.body[i][j+1:])
1274                     # Delete the accented characters
1275                     document.body[i] = document.body[i][:j-1]
1276                     # Finally add the InsetLaTeXAccent
1277                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1278                     break
1279         i = i + 1
1280
1281     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1282     for i in range(len(document.body)):
1283         document.body[i] = normalize("NFC", document.body[i])
1284
1285
1286 def normalize_font_whitespace_259(document):
1287     """ Before format 259 the font changes were ignored if a
1288     whitespace was the first or last character in the sequence, this function
1289     transfers the whitespace outside."""
1290
1291     char_properties = {"\\series": "default",
1292                        "\\emph": "default",
1293                        "\\color": "none",
1294                        "\\shape": "default",
1295                        "\\bar": "default",
1296                        "\\family": "default"}
1297     return normalize_font_whitespace(document, char_properties)
1298
1299 def normalize_font_whitespace_274(document):
1300     """ Before format 259 (sic) the font changes were ignored if a
1301     whitespace was the first or last character in the sequence. This was
1302     corrected for most font properties in format 259, but the language
1303     was forgotten then. This function applies the same conversion done
1304     there (namely, transfers the whitespace outside) for font language
1305     changes, as well."""
1306
1307     char_properties = {"\\lang": "default"}
1308     return normalize_font_whitespace(document, char_properties)
1309
1310 def get_paragraph_language(document, i):
1311     """ Return the language of the paragraph in which line i of the document
1312     body is. If the first thing in the paragraph is a \\lang command, that
1313     is the paragraph's langauge; otherwise, the paragraph's language is the
1314     document's language."""
1315
1316     lines = document.body
1317
1318     first_nonempty_line = \
1319         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1320
1321     words = lines[first_nonempty_line].split()
1322
1323     if len(words) > 1 and words[0] == "\\lang":
1324         return words[1]
1325     else:
1326         return document.language
1327
1328 def normalize_font_whitespace(document, char_properties):
1329     """ Before format 259 the font changes were ignored if a
1330     whitespace was the first or last character in the sequence, this function
1331     transfers the whitespace outside. Only a change in one of the properties
1332     in the provided     char_properties is handled by this function."""
1333
1334     if document.backend != "latex":
1335         return
1336
1337     lines = document.body
1338
1339     changes = {}
1340
1341     i = 0
1342     while i < len(lines):
1343         words = lines[i].split()
1344
1345         if len(words) > 0 and words[0] == "\\begin_layout":
1346             # a new paragraph resets all font changes
1347             changes.clear()
1348             # also reset the default language to be the paragraph's language
1349             if "\\lang" in char_properties.keys():
1350                 char_properties["\\lang"] = \
1351                     get_paragraph_language(document, i + 1)
1352
1353         elif len(words) > 1 and words[0] in char_properties.keys():
1354             # we have a font change
1355             if char_properties[words[0]] == words[1]:
1356                 # property gets reset
1357                 if words[0] in changes.keys():
1358                     del changes[words[0]]
1359                 defaultproperty = True
1360             else:
1361                 # property gets set
1362                 changes[words[0]] = words[1]
1363                 defaultproperty = False
1364
1365             # We need to explicitly reset all changed properties if we find
1366             # a space below, because LyX 1.4 would output the space after
1367             # closing the previous change and before starting the new one,
1368             # and closing a font change means to close all properties, not
1369             # just the changed one.
1370
1371             if lines[i-1] and lines[i-1][-1] == " ":
1372                 lines[i-1] = lines[i-1][:-1]
1373                 # a space before the font change
1374                 added_lines = [" "]
1375                 for k in changes.keys():
1376                     # exclude property k because that is already in lines[i]
1377                     if k != words[0]:
1378                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1379                 for k in changes.keys():
1380                     # exclude property k because that must be added below anyway
1381                     if k != words[0]:
1382                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1383                 if defaultproperty:
1384                     # Property is reset in lines[i], so add the new stuff afterwards
1385                     lines[i+1:i+1] = added_lines
1386                 else:
1387                     # Reset property for the space
1388                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1389                     lines[i:i] = added_lines
1390                 i = i + len(added_lines)
1391
1392             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1393                 # a space after the font change
1394                 if (lines[i+1] == " " and lines[i+2]):
1395                     next_words = lines[i+2].split()
1396                     if len(next_words) > 0 and next_words[0] == words[0]:
1397                         # a single blank with a property different from the
1398                         # previous and the next line must not be changed
1399                         i = i + 2
1400                         continue
1401                 lines[i+1] = lines[i+1][1:]
1402                 added_lines = [" "]
1403                 for k in changes.keys():
1404                     # exclude property k because that is already in lines[i]
1405                     if k != words[0]:
1406                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1407                 for k in changes.keys():
1408                     # exclude property k because that must be added below anyway
1409                     if k != words[0]:
1410                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1411                 # Reset property for the space
1412                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1413                 lines[i:i] = added_lines
1414                 i = i + len(added_lines)
1415
1416         i = i + 1
1417
1418
1419 def revert_utf8x(document):
1420     " Set utf8x encoding to utf8. "
1421     i = find_token(document.header, "\\inputencoding", 0)
1422     if i == -1:
1423         document.header.append("\\inputencoding auto")
1424     else:
1425         inputenc = get_value(document.header, "\\inputencoding", i)
1426         if inputenc == "utf8x":
1427             document.header[i] = "\\inputencoding utf8"
1428     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1429
1430
1431 def revert_utf8plain(document):
1432     " Set utf8plain encoding to utf8. "
1433     i = find_token(document.header, "\\inputencoding", 0)
1434     if i == -1:
1435         document.header.append("\\inputencoding auto")
1436     else:
1437         inputenc = get_value(document.header, "\\inputencoding", i)
1438         if inputenc == "utf8-plain":
1439             document.header[i] = "\\inputencoding utf8"
1440     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1441
1442
1443 def revert_beamer_alert(document):
1444     " Revert beamer's \\alert inset back to ERT. "
1445     i = 0
1446     while 1:
1447         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1448         if i == -1:
1449             return
1450         document.body[i] = "\\begin_inset ERT"
1451         i = i + 1
1452         while 1:
1453             if (document.body[i][:13] == "\\begin_layout"):
1454                 # Insert the \alert command
1455                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1456                 break
1457             i = i + 1
1458
1459         i = i + 1
1460
1461
1462 def revert_beamer_structure(document):
1463     " Revert beamer's \\structure inset back to ERT. "
1464     i = 0
1465     while 1:
1466         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1467         if i == -1:
1468             return
1469         document.body[i] = "\\begin_inset ERT"
1470         i = i + 1
1471         while 1:
1472             if (document.body[i][:13] == "\\begin_layout"):
1473                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1474                 break
1475             i = i + 1
1476
1477         i = i + 1
1478
1479
1480 def convert_changes(document):
1481     " Switch output_changes off if tracking_changes is off. "
1482     i = find_token(document.header, '\\tracking_changes', 0)
1483     if i == -1:
1484         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1485         return
1486     j = find_token(document.header, '\\output_changes', 0)
1487     if j == -1:
1488         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1489         return
1490     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1491     output_changes = get_value(document.header, "\\output_changes", j)
1492     if tracking_changes == "false" and output_changes == "true":
1493         document.header[j] = "\\output_changes false"
1494
1495
1496 def revert_ascii(document):
1497     " Set ascii encoding to auto. "
1498     i = find_token(document.header, "\\inputencoding", 0)
1499     if i == -1:
1500         document.header.append("\\inputencoding auto")
1501     else:
1502         inputenc = get_value(document.header, "\\inputencoding", i)
1503         if inputenc == "ascii":
1504             document.header[i] = "\\inputencoding auto"
1505     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1506
1507
1508 def normalize_language_name(document):
1509     lang = { "brazil": "brazilian",
1510              "portuges": "portuguese"}
1511
1512     if document.language in lang:
1513         document.language = lang[document.language]
1514         i = find_token(document.header, "\\language", 0)
1515         document.header[i] = "\\language %s" % document.language
1516
1517
1518 def revert_language_name(document):
1519     lang = { "brazilian": "brazil",
1520              "portuguese": "portuges"}
1521
1522     if document.language in lang:
1523         document.language = lang[document.language]
1524         i = find_token(document.header, "\\language", 0)
1525         document.header[i] = "\\language %s" % document.language
1526
1527 #
1528 #  \textclass cv -> \textclass simplecv
1529 def convert_cv_textclass(document):
1530     if document.textclass == "cv":
1531         document.textclass = "simplecv"
1532
1533
1534 def revert_cv_textclass(document):
1535     if document.textclass == "simplecv":
1536         document.textclass = "cv"
1537
1538
1539 #
1540 # add scaleBeforeRotation graphics param
1541 def convert_graphics_rotation(document):
1542     " add scaleBeforeRotation graphics parameter. "
1543     i = 0
1544     while 1:
1545         i = find_token(document.body, "\\begin_inset Graphics", i)
1546         if i == -1:
1547             return
1548         j = find_end_of_inset(document.body, i+1)
1549         if j == -1:
1550             # should not happen
1551             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1552         # Seach for rotateAngle and width or height or scale
1553         # If these params are not there, nothing needs to be done.
1554         k = find_token(document.body, "\trotateAngle", i + 1, j)
1555         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1556         if (k != -1 and l != -1):
1557             document.body.insert(j, 'scaleBeforeRotation')
1558         i = i + 1
1559
1560
1561 #
1562 # remove scaleBeforeRotation graphics param
1563 def revert_graphics_rotation(document):
1564     " remove scaleBeforeRotation graphics parameter. "
1565     i = 0
1566     while 1:
1567         i = find_token(document.body, "\\begin_inset Graphics", i)
1568         if i == -1:
1569             return
1570         j = find_end_of_inset(document.body, i + 1)
1571         if j == -1:
1572             # should not happen
1573             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1574         # If there's a scaleBeforeRotation param, just remove that
1575         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1576         if k != -1:
1577             del document.body[k]
1578         else:
1579             # if not, and if we have rotateAngle and width or height or scale,
1580             # we have to put the rotateAngle value to special
1581             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1582             special = get_value(document.body, 'special', i + 1, j)
1583             if rotateAngle != "":
1584                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1585                 if k == -1:
1586                     break
1587                 if special == "":
1588                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1589                 else:
1590                     l = find_token(document.body, "\tspecial", i + 1, j)
1591                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1592                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1593                 if k != -1:
1594                     del document.body[k]
1595         i = i + 1
1596
1597
1598
1599 def convert_tableborder(document):
1600     # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1601     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1602     i = 0
1603     while i < len(document.body):
1604         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1605         k = document.body[i].find("|>{", 0, len(document.body[i]))
1606         # the two tokens have to be in one line
1607         if (h != -1 and k != -1):
1608             # delete the "|"
1609             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
1610         i = i + 1
1611
1612
1613 def revert_tableborder(document):
1614     i = 0
1615     while i < len(document.body):
1616         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1617         k = document.body[i].find(">{", 0, len(document.body[i]))
1618         # the two tokens have to be in one line
1619         if (h != -1 and k != -1):
1620             # add the "|"
1621             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1622         i = i + 1
1623
1624
1625 def revert_armenian(document):
1626
1627     # set inputencoding from armscii8 to auto
1628     if document.inputencoding == "armscii8":
1629         i = find_token(document.header, "\\inputencoding", 0)
1630         if i != -1:
1631             document.header[i] = "\\inputencoding auto"
1632     # check if preamble exists, if not k is set to -1
1633     i = 0
1634     k = -1
1635     while i < len(document.preamble):
1636         if k == -1:
1637             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1638         if k == -1:
1639             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1640         i = i + 1
1641     # add the entry \usepackage{armtex} to the document preamble
1642     if document.language == "armenian":
1643         # set the armtex entry as the first preamble line
1644         if k != -1:
1645             document.preamble[0:0] = ["\\usepackage{armtex}"]
1646         # create the preamble when it doesn't exist
1647         else:
1648             document.preamble.append('\\usepackage{armtex}')
1649     # Set document language from armenian to english
1650     if document.language == "armenian":
1651         document.language = "english"
1652         i = find_token(document.header, "\\language", 0)
1653         if i != -1:
1654             document.header[i] = "\\language english"
1655
1656
1657 def revert_CJK(document):
1658     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1659     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1660                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1661     i = find_token(document.header, "\\inputencoding", 0)
1662     if i == -1:
1663         document.header.append("\\inputencoding auto")
1664     else:
1665         inputenc = get_value(document.header, "\\inputencoding", i)
1666         if inputenc in encodings:
1667             document.header[i] = "\\inputencoding default"
1668     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1669
1670     if document.language == "chinese-simplified" or \
1671        document.language == "chinese-traditional" or \
1672        document.language == "japanese" or document.language == "korean":
1673         document.language = "english"
1674         i = find_token(document.header, "\\language", 0)
1675         if i != -1:
1676             document.header[i] = "\\language english"
1677
1678
1679 def revert_preamble_listings_params(document):
1680     " Revert preamble option \listings_params "
1681     i = find_token(document.header, "\\listings_params", 0)
1682     if i != -1:
1683         document.preamble.append('\\usepackage{listings}')
1684         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1685         document.header.pop(i);
1686
1687
1688 def revert_listings_inset(document):
1689     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1690 FROM
1691
1692 \begin_inset
1693 lstparams "language=Delphi"
1694 inline true
1695 status open
1696
1697 \begin_layout Standard
1698 var i = 10;
1699 \end_layout
1700
1701 \end_inset
1702
1703 TO
1704
1705 \begin_inset ERT
1706 status open
1707 \begin_layout Standard
1708
1709
1710 \backslash
1711 lstinline[language=Delphi]{var i = 10;}
1712 \end_layout
1713
1714 \end_inset
1715
1716 There can be an caption inset in this inset
1717
1718 \begin_layout Standard
1719 \begin_inset Caption
1720
1721 \begin_layout Standard
1722 before label
1723 \begin_inset LatexCommand label
1724 name "lst:caption"
1725
1726 \end_inset
1727
1728 after label
1729 \end_layout
1730
1731 \end_inset
1732
1733
1734 \end_layout
1735
1736 '''
1737     i = 0
1738     while True:
1739         i = find_token(document.body, '\\begin_inset listings', i)
1740         if i == -1:
1741             break
1742         else:
1743             if not '\\usepackage{listings}' in document.preamble:
1744                 document.preamble.append('\\usepackage{listings}')
1745         j = find_end_of_inset(document.body, i + 1)
1746         if j == -1:
1747             # this should not happen
1748             break
1749         inline = 'false'
1750         params = ''
1751         status = 'open'
1752         # first three lines
1753         for line in range(i + 1, i + 4):
1754             if document.body[line].startswith('inline'):
1755                 inline = document.body[line].split()[1]
1756             if document.body[line].startswith('lstparams'):
1757                 params = document.body[line].split()[1].strip('"')
1758             if document.body[line].startswith('status'):
1759                 status = document.body[line].split()[1].strip()
1760                 k = line + 1
1761         # caption?
1762         caption = ''
1763         label = ''
1764         cap = find_token(document.body, '\\begin_inset Caption', i)
1765         if cap != -1:
1766             cap_end = find_end_of_inset(document.body, cap + 1)
1767             if cap_end == -1:
1768                 # this should not happen
1769                 break
1770             # label?
1771             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1772             if lbl != -1:
1773                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1774                 if lbl_end == -1:
1775                     # this should not happen
1776                     break
1777             else:
1778                 lbl = cap_end
1779                 lbl_end = cap_end
1780             for line in document.body[lbl : lbl_end + 1]:
1781                 if line.startswith('name '):
1782                     label = line.split()[1].strip('"')
1783                     break
1784             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1785                 if not line.startswith('\\'):
1786                     caption += line.strip()
1787             k = cap_end + 1
1788         inlinecode = ''
1789         # looking for the oneline code for lstinline
1790         inlinecode = document.body[find_end_of_layout(document.body,
1791             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1792         if len(caption) > 0:
1793             if len(params) == 0:
1794                 params = 'caption={%s}' % caption
1795             else:
1796                 params += ',caption={%s}' % caption
1797         if len(label) > 0:
1798             if len(params) == 0:
1799                 params = 'label={%s}' % label
1800             else:
1801                 params += ',label={%s}' % label
1802         if len(params) > 0:
1803             params = '[%s]' % params
1804             params = params.replace('\\', '\\backslash\n')
1805         if inline == 'true':
1806             document.body[i:(j+1)] = [r'\begin_inset ERT',
1807                                       'status %s' % status,
1808                                       r'\begin_layout %s' % document.default_layout,
1809                                       '',
1810                                       '',
1811                                       r'\backslash',
1812                                       'lstinline%s{%s}' % (params, inlinecode),
1813                                       r'\end_layout',
1814                                       '',
1815                                       r'\end_inset']
1816         else:
1817             document.body[i: j+1] =  [r'\begin_inset ERT',
1818                                       'status %s' % status,
1819                                       '',
1820                                       r'\begin_layout %s' % document.default_layout,
1821                                       '',
1822                                       '',
1823                                       r'\backslash',
1824                                       r'begin{lstlisting}%s' % params,
1825                                       r'\end_layout',
1826                                       '',
1827                                       r'\begin_layout %s' % document.default_layout,
1828                                     ] + document.body[k : j - 1] + \
1829                                      ['',
1830                                       r'\begin_layout %s' % document.default_layout,
1831                                       '',
1832                                       r'\backslash',
1833                                       'end{lstlisting}',
1834                                       r'\end_layout',
1835                                       '',
1836                                       r'\end_inset']
1837
1838
1839 def revert_include_listings(document):
1840     r''' Revert lstinputlisting Include option , translate
1841 \begin_inset Include \lstinputlisting{file}[opt]
1842 preview false
1843
1844 \end_inset
1845
1846 TO
1847
1848 \begin_inset ERT
1849 status open
1850
1851 \begin_layout Standard
1852
1853
1854 \backslash
1855 lstinputlisting{file}[opt]
1856 \end_layout
1857
1858 \end_inset
1859     '''
1860
1861     i = 0
1862     while True:
1863         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1864         if i == -1:
1865             break
1866         else:
1867             if not '\\usepackage{listings}' in document.preamble:
1868                 document.preamble.append('\\usepackage{listings}')
1869         j = find_end_of_inset(document.body, i + 1)
1870         if j == -1:
1871             # this should not happen
1872             break
1873         # find command line lstinputlisting{file}[options]
1874         cmd, file, option = '', '', ''
1875         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1876             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1877         option = option.replace('\\', '\\backslash\n')
1878         document.body[i : j + 1] = [r'\begin_inset ERT',
1879                                     'status open',
1880                                     '',
1881                                     r'\begin_layout %s' % document.default_layout,
1882                                     '',
1883                                     '',
1884                                     r'\backslash',
1885                                     '%s%s{%s}' % (cmd, option, file),
1886                                     r'\end_layout',
1887                                     '',
1888                                     r'\end_inset']
1889
1890
1891 def revert_ext_font_sizes(document):
1892     if document.backend != "latex": return
1893     if not document.textclass.startswith("ext"): return
1894
1895     fontsize = get_value(document.header, '\\paperfontsize', 0)
1896     if fontsize not in ('10', '11', '12'): return
1897     fontsize += 'pt'
1898
1899     i = find_token(document.header, '\\paperfontsize', 0)
1900     document.header[i] = '\\paperfontsize default'
1901
1902     i = find_token(document.header, '\\options', 0)
1903     if i == -1:
1904         i = find_token(document.header, '\\textclass', 0) + 1
1905         document.header[i:i] = ['\\options %s' % fontsize]
1906     else:
1907         document.header[i] += ',%s' % fontsize
1908
1909
1910 def convert_ext_font_sizes(document):
1911     if document.backend != "latex": return
1912     if not document.textclass.startswith("ext"): return
1913
1914     fontsize = get_value(document.header, '\\paperfontsize', 0)
1915     if fontsize != 'default': return
1916
1917     i = find_token(document.header, '\\options', 0)
1918     if i == -1: return
1919
1920     options = get_value(document.header, '\\options', i)
1921
1922     fontsizes = '10pt', '11pt', '12pt'
1923     for fs in fontsizes:
1924         if options.find(fs) != -1:
1925             break
1926     else: # this else will only be attained if the for cycle had no match
1927         return
1928
1929     options = options.split(',')
1930     for j, opt in enumerate(options):
1931         if opt in fontsizes:
1932             fontsize = opt[:-2]
1933             del options[j]
1934             break
1935     else:
1936         return
1937
1938     k = find_token(document.header, '\\paperfontsize', 0)
1939     document.header[k] = '\\paperfontsize %s' % fontsize
1940
1941     if options:
1942         document.header[i] = '\\options %s' % ','.join(options)
1943     else:
1944         del document.header[i]
1945
1946
1947 def revert_separator_layout(document):
1948     r'''Revert --Separator-- to a lyx note
1949 From
1950
1951 \begin_layout --Separator--
1952 something
1953 \end_layout
1954
1955 to
1956
1957 \begin_layout Standard
1958 \begin_inset Note Note
1959 status open
1960
1961 \begin_layout Standard
1962 Separate Evironment
1963 \end_layout
1964
1965 \end_inset
1966 something
1967
1968 \end_layout
1969
1970     '''
1971
1972     i = 0
1973     while True:
1974         i = find_token(document.body, r'\begin_layout --Separator--', i)
1975         if i == -1:
1976             break
1977         j = find_end_of_layout(document.body, i + 1)
1978         if j == -1:
1979             # this should not happen
1980             break
1981         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1982                                     r'\begin_inset Note Note',
1983                                     'status open',
1984                                     '',
1985                                     r'\begin_layout %s' % document.default_layout,
1986                                     'Separate Environment',
1987                                     r'\end_layout',
1988                                     '',
1989                                     r'\end_inset'] + \
1990                                     document.body[ i + 1 : j] + \
1991                                     ['',
1992                                     r'\end_layout'
1993                                     ]
1994
1995
1996 def convert_arabic (document):
1997     if document.language == "arabic":
1998         document.language = "arabic_arabtex"
1999         i = find_token(document.header, "\\language", 0)
2000         if i != -1:
2001             document.header[i] = "\\language arabic_arabtex"
2002     i = 0
2003     while i < len(document.body):
2004         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
2005         if (h != -1):
2006             # change the language name
2007             document.body[i] = '\lang arabic_arabtex'
2008         i = i + 1
2009
2010
2011 def revert_arabic (document):
2012     if document.language == "arabic_arabtex":
2013         document.language = "arabic"
2014         i = find_token(document.header, "\\language", 0)
2015         if i != -1:
2016             document.header[i] = "\\language arabic"
2017     i = 0
2018     while i < len(document.body):
2019         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2020         if (h != -1):
2021             # change the language name
2022             document.body[i] = '\lang arabic'
2023         i = i + 1
2024
2025
2026 ##
2027 # Conversion hub
2028 #
2029
2030 supported_versions = ["1.5.0","1.5"]
2031 convert = [[246, []],
2032            [247, [convert_font_settings]],
2033            [248, []],
2034            [249, [convert_utf8]],
2035            [250, []],
2036            [251, []],
2037            [252, [convert_commandparams, convert_bibitem]],
2038            [253, []],
2039            [254, [convert_esint]],
2040            [255, []],
2041            [256, []],
2042            [257, [convert_caption]],
2043            [258, [convert_lyxline]],
2044            [259, [convert_accent, normalize_font_whitespace_259]],
2045            [260, []],
2046            [261, [convert_changes]],
2047            [262, []],
2048            [263, [normalize_language_name]],
2049            [264, [convert_cv_textclass]],
2050            [265, [convert_tableborder]],
2051            [266, []],
2052            [267, []],
2053            [268, []],
2054            [269, []],
2055            [270, []],
2056            [271, [convert_ext_font_sizes]],
2057            [272, []],
2058            [273, []],
2059            [274, [normalize_font_whitespace_274]],
2060            [275, [convert_graphics_rotation]],
2061            [276, [convert_arabic]]
2062           ]
2063
2064 revert =  [
2065            [275, [revert_arabic]],
2066            [274, [revert_graphics_rotation]],
2067            [273, []],
2068            [272, [revert_separator_layout]],
2069            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2070            [270, [revert_ext_font_sizes]],
2071            [269, [revert_beamer_alert, revert_beamer_structure]],
2072            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2073            [267, [revert_CJK]],
2074            [266, [revert_utf8plain]],
2075            [265, [revert_armenian]],
2076            [264, [revert_tableborder]],
2077            [263, [revert_cv_textclass]],
2078            [262, [revert_language_name]],
2079            [261, [revert_ascii]],
2080            [260, []],
2081            [259, [revert_utf8x]],
2082            [258, []],
2083            [257, []],
2084            [256, [revert_caption]],
2085            [255, [revert_encodings]],
2086            [254, [revert_clearpage, revert_cleardoublepage]],
2087            [253, [revert_esint]],
2088            [252, [revert_nomenclature, revert_printnomenclature]],
2089            [251, [revert_commandparams]],
2090            [250, [revert_cs_label]],
2091            [249, []],
2092            [248, [revert_accent, revert_utf8, revert_unicode]],
2093            [247, [revert_booktabs]],
2094            [246, [revert_font_settings]],
2095            [245, [revert_framed]]]
2096
2097
2098 if __name__ == "__main__":
2099     pass