lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29 # Provide support for both python 2 and 3
  30 PY2 = sys.version_info[0] == 2
  31 if not PY2:
  32     text_type = str
  33     unichr = chr
  34 else:
  35     text_type = unicode
  36     unichr = unichr
  37 # End of code to support for both python 2 and 3
  38
  39 ####################################################################
  40 # Private helper functions
  41
  42 def find_end_of_inset(lines, i):
  43     " Find end of inset, where lines[i] is included."
  44     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  45
  46 def find_end_of_layout(lines, i):
  47     " Find end of layout, where lines[i] is included."
  48     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  49
  50 def find_beginning_of_layout(lines, i):
  51     "Find beginning of layout, where lines[i] is included."
  52     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  53
  54 # End of helper functions
  55 ####################################################################
  56
  57
  58 ##
  59 #  Notes: Framed/Shaded
  60 #
  61
  62 def revert_framed(document):
  63     "Revert framed notes. "
  64     i = 0
  65     while 1:
  66         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  67
  68         if i == -1:
  69             return
  70         document.body[i] = "\\begin_inset Note"
  71         i = i + 1
  72
  73
  74 ##
  75 #  Fonts
  76 #
  77
  78 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  79                     'times'   : 'times',   'palatino' : 'palatino',
  80                     'helvet'  : 'default', 'avant'    : 'default',
  81                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  82                     'pslatex' : 'times'}
  83 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  84                     'times'   : 'default', 'palatino' : 'default',
  85                     'helvet'  : 'helvet',  'avant'    : 'avant',
  86                     'newcent' : 'default', 'bookman'  : 'default',
  87                     'pslatex' : 'helvet'}
  88 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  89                     'times'   : 'default', 'palatino' : 'default',
  90                     'helvet'  : 'default', 'avant'    : 'default',
  91                     'newcent' : 'default', 'bookman'  : 'default',
  92                     'pslatex' : 'courier'}
  93
  94 def convert_font_settings(document):
  95     " Convert font settings. "
  96     i = 0
  97     i = find_token_exact(document.header, "\\fontscheme", i)
  98     if i == -1:
  99         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
 100         return
 101     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
 102     if font_scheme == '':
 103         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
 104         font_scheme = 'default'
 105     if not font_scheme in list(roman_fonts.keys()):
 106         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
 107         font_scheme = 'default'
 108     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 109                           '\\font_sans %s' % sans_fonts[font_scheme],
 110                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 111                           '\\font_default_family default',
 112                           '\\font_sc false',
 113                           '\\font_osf false',
 114                           '\\font_sf_scale 100',
 115                           '\\font_tt_scale 100']
 116
 117
 118 def revert_font_settings(document):
 119     " Revert font settings. "
 120     i = 0
 121     insert_line = -1
 122     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 123     for family in 'roman', 'sans', 'typewriter':
 124         name = '\\font_%s' % family
 125         i = find_token_exact(document.header, name, i)
 126         if i == -1:
 127             document.warning("Malformed LyX document: Missing `%s'." % name)
 128             i = 0
 129         else:
 130             if (insert_line < 0):
 131                 insert_line = i
 132             fonts[family] = get_value(document.header, name, i, i + 1)
 133             del document.header[i]
 134     i = find_token_exact(document.header, '\\font_default_family', i)
 135     if i == -1:
 136         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 137         font_default_family = 'default'
 138     else:
 139         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 140         del document.header[i]
 141     i = find_token_exact(document.header, '\\font_sc', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 144         font_sc = 'false'
 145     else:
 146         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 147         del document.header[i]
 148     if font_sc != 'false':
 149         document.warning("Conversion of '\\font_sc' not yet implemented.")
 150     i = find_token_exact(document.header, '\\font_osf', i)
 151     if i == -1:
 152         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 153         font_osf = 'false'
 154     else:
 155         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 156         del document.header[i]
 157     i = find_token_exact(document.header, '\\font_sf_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 160         font_sf_scale = '100'
 161     else:
 162         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 163         del document.header[i]
 164     if font_sf_scale != '100':
 165         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 166     i = find_token_exact(document.header, '\\font_tt_scale', i)
 167     if i == -1:
 168         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 169         font_tt_scale = '100'
 170     else:
 171         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 172         del document.header[i]
 173     if font_tt_scale != '100':
 174         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 175     for font_scheme in list(roman_fonts.keys()):
 176         if (roman_fonts[font_scheme] == fonts['roman'] and
 177             sans_fonts[font_scheme] == fonts['sans'] and
 178             typewriter_fonts[font_scheme] == fonts['typewriter']):
 179             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 180             if font_default_family != 'default':
 181                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 182             if font_osf == 'true':
 183                 document.warning("Ignoring `\\font_osf = true'")
 184             return
 185     font_scheme = 'default'
 186     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 187     if fonts['roman'] == 'cmr':
 188         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 189         if font_osf == 'true':
 190             document.preamble.append('\\usepackage{eco}')
 191             font_osf = 'false'
 192     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 193         if fonts['roman'] == font:
 194             document.preamble.append('\\usepackage{%s}' % font)
 195     for font in 'cmss', 'lmss', 'cmbr':
 196         if fonts['sans'] == font:
 197             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 198     for font in 'berasans':
 199         if fonts['sans'] == font:
 200             document.preamble.append('\\usepackage{%s}' % font)
 201     for font in 'cmtt', 'lmtt', 'cmtl':
 202         if fonts['typewriter'] == font:
 203             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 204     for font in 'courier', 'beramono', 'luximono':
 205         if fonts['typewriter'] == font:
 206             document.preamble.append('\\usepackage{%s}' % font)
 207     if font_default_family != 'default':
 208         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 209     if font_osf == 'true':
 210         document.warning("Ignoring `\\font_osf = true'")
 211
 212
 213 def revert_booktabs(document):
 214     " We remove the booktabs flag or everything else will become a mess. "
 215     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 216     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 217     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 218     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 219     i = 0
 220     while 1:
 221         i = find_token(document.body, "\\begin_inset Tabular", i)
 222         if i == -1:
 223             return
 224         j = find_end_of_inset(document.body, i + 1)
 225         if j == -1:
 226             document.warning("Malformed LyX document: Could not find end of tabular.")
 227             continue
 228         for k in range(i, j):
 229             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 230                 document.warning("Converting 'booktabs' table to normal table.")
 231                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 232             if re.search(re_row, document.body[k]):
 233                 document.warning("Removing extra row space.")
 234                 document.body[k] = re_tspace.sub('', document.body[k])
 235                 document.body[k] = re_bspace.sub('', document.body[k])
 236                 document.body[k] = re_ispace.sub('', document.body[k])
 237         i = i + 1
 238
 239
 240 def convert_multiencoding(document, forward):
 241     """ Fix files with multiple encodings.
 242 Files with an inputencoding of "auto" or "default" and multiple languages
 243 where at least two languages have different default encodings are encoded
 244 in multiple encodings for file formats < 249. These files are incorrectly
 245 read and written (as if the whole file was in the encoding of the main
 246 language).
 247 This is not true for files written by CJK-LyX, they are always in the locale
 248 encoding.
 249
 250 This function
 251 - converts from fake unicode values to true unicode if forward is true, and
 252 - converts from true unicode values to fake unicode if forward is false.
 253 document.encoding must be set to the old value (format 248) in both cases.
 254
 255 We do this here and not in LyX.py because it is far easier to do the
 256 necessary parsing in modern formats than in ancient ones.
 257 """
 258     inset_types = ["Foot", "Note"]
 259     if document.cjk_encoding != '':
 260         return
 261     encoding_stack = [document.encoding]
 262     insets = []
 263     lang_re = re.compile(r"^\\lang\s(\S+)")
 264     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 265     if not forward: # no need to read file unless we are reverting
 266         spec_chars = read_unicodesymbols()
 267
 268     if document.inputencoding == "auto" or document.inputencoding == "default":
 269         i = 0
 270         while i < len(document.body):
 271             result = lang_re.match(document.body[i])
 272             if result:
 273                 language = result.group(1)
 274                 if language == "default":
 275                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 276                     encoding_stack[-1] = document.encoding
 277                 else:
 278                     from lyx2lyx_lang import lang
 279                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 280                     encoding_stack[-1] = lang[language][3]
 281             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 282                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 283                 if len(insets) > 0 and insets[-1] in inset_types:
 284                     from lyx2lyx_lang import lang
 285                     encoding_stack.append(lang[document.language][3])
 286                 else:
 287                     encoding_stack.append(encoding_stack[-1])
 288             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 289                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 290                 if len(encoding_stack) == 1:
 291                     # Don't remove the document encoding from the stack
 292                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 293                 else:
 294                     del encoding_stack[-1]
 295             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 296                 inset_result = inset_re.match(document.body[i])
 297                 if inset_result:
 298                     insets.append(inset_result.group(1))
 299                 else:
 300                     insets.append("")
 301             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 302                 del insets[-1]
 303             if encoding_stack[-1] != document.encoding:
 304                 if forward:
 305                     # This line has been incorrectly interpreted as if it was
 306                     # encoded in 'encoding'.
 307                     # Convert back to the 8bit string that was in the file.
 308                     orig = document.body[i].encode(document.encoding)
 309                     # Convert the 8bit string that was in the file to unicode
 310                     # with the correct encoding.
 311                     document.body[i] = orig.decode(encoding_stack[-1])
 312                 else:
 313                     try:
 314                         # Convert unicode to the 8bit string that will be written
 315                         # to the file with the correct encoding.
 316                         orig = document.body[i].encode(encoding_stack[-1])
 317                         # Convert the 8bit string that will be written to the
 318                         # file to fake unicode with the encoding that will later
 319                         # be used when writing to the file.
 320                         document.body[i] = orig.decode(document.encoding)
 321                     except:
 322                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 323                         document.body[i:i+1] = mod_line.split('\n')
 324                         i += len(mod_line.split('\n')) - 1
 325             i += 1
 326
 327
 328 def convert_utf8(document):
 329     " Set document encoding to UTF-8. "
 330     convert_multiencoding(document, True)
 331     document.encoding = "utf8"
 332
 333
 334 def revert_utf8(document):
 335     " Set document encoding to the value corresponding to inputencoding. "
 336     i = find_token(document.header, "\\inputencoding", 0)
 337     if i == -1:
 338         document.header.append("\\inputencoding auto")
 339     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 340         document.header[i] = "\\inputencoding auto"
 341     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 342     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 343     convert_multiencoding(document, False)
 344
 345
 346 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
 347 def read_unicodesymbols():
 348     " Read the unicodesymbols list of unicode characters and corresponding commands."
 349     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 350     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 351     spec_chars = {}
 352     for line in fp.readlines():
 353         if line[0] != '#':
 354             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 355             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 356             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 357             try:
 358                 # flag1 and flag2 are preamble and other flags
 359                 [ucs4,command,flag1,flag2] =line.split(None,3)
 360                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 361             except:
 362                 pass
 363     fp.close()
 364     return spec_chars
 365
 366
 367 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 368     # Define strings to start and end ERT and math insets
 369     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
 370     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 371     math_intro='\n\\begin_inset Formula $'
 372     math_outro='$\n\\end_inset'
 373
 374     mod_line = u''
 375     if i and not is_inset_line(document, i-1):
 376         last_char = document.body[i - 1][-1:]
 377     else:
 378         last_char = ''
 379
 380     line = document.body[i]
 381     for character in line:
 382         try:
 383             # Try to write the character
 384             dummy = character.encode(document.encoding)
 385             mod_line += character
 386             last_char = character
 387         except:
 388             # Try to replace with ERT/math inset
 389             if character in spec_chars:
 390                 command = spec_chars[character][0] # the command to replace unicode
 391                 flag1 = spec_chars[character][1]
 392                 flag2 = spec_chars[character][2]
 393                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 394                     # We have a character that should be combined with the previous
 395                     command += '{' + last_char + '}'
 396                     # Remove the last character. Ignore if it is whitespace
 397                     if len(last_char.rstrip()):
 398                         # last_char was found and is not whitespace
 399                         if mod_line:
 400                             mod_line = mod_line[:-1]
 401                         else: # last_char belongs to the last line
 402                             document.body[i-1] = document.body[i-1][:-1]
 403                     else:
 404                         # The last character was replaced by a command. For now it is
 405                         # ignored. This could be handled better.
 406                         pass
 407                 if command[0:2] == '\\\\':
 408                     if command[2:12]=='ensuremath':
 409                         if insets and insets[-1] == "ERT":
 410                             # math in ERT
 411                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 412                             command = command.replace('}', '$\n')
 413                         elif not insets or insets[-1] != "Formula":
 414                             # add a math inset with the replacement character
 415                             command = command.replace('\\\\ensuremath{\\', math_intro)
 416                             command = command.replace('}', math_outro)
 417                         else:
 418                             # we are already in a math inset
 419                             command = command.replace('\\\\ensuremath{\\', '')
 420                             command = command.replace('}', '')
 421                     else:
 422                         if insets and insets[-1] == "Formula":
 423                             # avoid putting an ERT in a math; instead put command as text
 424                             command = command.replace('\\\\', '\mathrm{')
 425                             command = command + '}'
 426                         elif not insets or insets[-1] != "ERT":
 427                             # add an ERT inset with the replacement character
 428                             command = command.replace('\\\\', '\n\\backslash\n')
 429                             command = ert_intro + command + ert_outro
 430                         else:
 431                             command = command.replace('\\\\', '\n\\backslash\n')
 432                     last_char = '' # indicate that the character should not be removed
 433                 mod_line += command
 434             else:
 435                 # Replace with replacement string
 436                 mod_line += replacement_character
 437     return mod_line
 438
 439
 440 def revert_unicode(document):
 441     '''Transform unicode characters that can not be written using the
 442 document encoding to commands according to the unicodesymbols
 443 file. Characters that can not be replaced by commands are replaced by
 444 an replacement string.  Flags other than 'combined' are currently not
 445 implemented.'''
 446     spec_chars = read_unicodesymbols()
 447     insets = [] # list of active insets
 448
 449     # Go through the document to capture all combining characters
 450     i = 0
 451     while i < len(document.body):
 452         line = document.body[i]
 453         # Check for insets
 454         if line.find('\\begin_inset') > -1:
 455             insets.append(line[13:].split()[0])
 456         if line.find('\\end_inset') > -1:
 457             del insets[-1]
 458
 459         # Try to write the line
 460         try:
 461             # If all goes well the line is written here
 462             dummy = line.encode(document.encoding)
 463             i += 1
 464         except:
 465             # Error, some character(s) in the line need to be replaced
 466             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 467             document.body[i:i+1] = mod_line.split('\n')
 468             i += len(mod_line.split('\n'))
 469
 470
 471 def revert_cs_label(document):
 472     " Remove status flag of charstyle label. "
 473     i = 0
 474     while 1:
 475         i = find_token(document.body, "\\begin_inset CharStyle", i)
 476         if i == -1:
 477             return
 478         # Seach for a line starting 'show_label'
 479         # If it is not there, break with a warning message
 480         i = i + 1
 481         while 1:
 482             if (document.body[i][:10] == "show_label"):
 483                 del document.body[i]
 484                 break
 485             elif (document.body[i][:13] == "\\begin_layout"):
 486                 document.warning("Malformed LyX document: Missing 'show_label'.")
 487                 break
 488             i = i + 1
 489
 490         i = i + 1
 491
 492
 493 def convert_bibitem(document):
 494     """ Convert
 495 \bibitem [option]{argument}
 496
 497 to
 498
 499 \begin_inset LatexCommand bibitem
 500 label "option"
 501 key "argument"
 502
 503 \end_inset
 504
 505 This must be called after convert_commandparams.
 506 """
 507     i = 0
 508     while 1:
 509         i = find_token(document.body, "\\bibitem", i)
 510         if i == -1:
 511             break
 512         j = document.body[i].find('[') + 1
 513         k = document.body[i].rfind(']')
 514         if j == 0: # No optional argument found
 515             option = None
 516         else:
 517             option = document.body[i][j:k]
 518         j = document.body[i].rfind('{') + 1
 519         k = document.body[i].rfind('}')
 520         argument = document.body[i][j:k]
 521         lines = ['\\begin_inset LatexCommand bibitem']
 522         if option != None:
 523             lines.append('label "%s"' % option.replace('"', '\\"'))
 524         lines.append('key "%s"' % argument.replace('"', '\\"'))
 525         lines.append('')
 526         lines.append('\\end_inset')
 527         document.body[i:i+1] = lines
 528         i = i + 1
 529
 530
 531 commandparams_info = {
 532     # command : [option1, option2, argument]
 533     "bibitem" : ["label", "", "key"],
 534     "bibtex" : ["options", "btprint", "bibfiles"],
 535     "cite"        : ["after", "before", "key"],
 536     "citet"       : ["after", "before", "key"],
 537     "citep"       : ["after", "before", "key"],
 538     "citealt"     : ["after", "before", "key"],
 539     "citealp"     : ["after", "before", "key"],
 540     "citeauthor"  : ["after", "before", "key"],
 541     "citeyear"    : ["after", "before", "key"],
 542     "citeyearpar" : ["after", "before", "key"],
 543     "citet*"      : ["after", "before", "key"],
 544     "citep*"      : ["after", "before", "key"],
 545     "citealt*"    : ["after", "before", "key"],
 546     "citealp*"    : ["after", "before", "key"],
 547     "citeauthor*" : ["after", "before", "key"],
 548     "Citet"       : ["after", "before", "key"],
 549     "Citep"       : ["after", "before", "key"],
 550     "Citealt"     : ["after", "before", "key"],
 551     "Citealp"     : ["after", "before", "key"],
 552     "Citeauthor"  : ["after", "before", "key"],
 553     "Citet*"      : ["after", "before", "key"],
 554     "Citep*"      : ["after", "before", "key"],
 555     "Citealt*"    : ["after", "before", "key"],
 556     "Citealp*"    : ["after", "before", "key"],
 557     "Citeauthor*" : ["after", "before", "key"],
 558     "citefield"   : ["after", "before", "key"],
 559     "citetitle"   : ["after", "before", "key"],
 560     "cite*"       : ["after", "before", "key"],
 561     "hfill" : ["", "", ""],
 562     "index"      : ["", "", "name"],
 563     "printindex" : ["", "", "name"],
 564     "label" : ["", "", "name"],
 565     "eqref"     : ["name", "", "reference"],
 566     "pageref"   : ["name", "", "reference"],
 567     "prettyref" : ["name", "", "reference"],
 568     "ref"       : ["name", "", "reference"],
 569     "vpageref"  : ["name", "", "reference"],
 570     "vref"      : ["name", "", "reference"],
 571     "tableofcontents" : ["", "", "type"],
 572     "htmlurl" : ["name", "", "target"],
 573     "url"     : ["name", "", "target"]}
 574
 575
 576 def convert_commandparams(document):
 577     """ Convert
 578
 579  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 580  \end_inset
 581
 582  to
 583
 584  \begin_inset LatexCommand cmdname
 585  name1 "opt1"
 586  name2 "opt2"
 587  name3 "arg"
 588  \end_inset
 589
 590  name1, name2 and name3 can be different for each command.
 591 """
 592     # \begin_inset LatexCommand bibitem was not the official version (see
 593     # convert_bibitem()), but could be read in, so we convert it here, too.
 594
 595     i = 0
 596     while 1:
 597         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 598         if i == -1:
 599             break
 600         command = document.body[i][26:].strip()
 601         if command == "":
 602             document.warning("Malformed LyX document: Missing LatexCommand name.")
 603             i = i + 1
 604             continue
 605
 606         j = find_token(document.body, "\\end_inset", i + 1)
 607         if j == -1:
 608             document.warning("Malformed document")
 609         else:
 610             command += "".join(document.body[i+1:j])
 611             document.body[i+1:j] = []
 612
 613         # The following parser is taken from the original InsetCommandParams::scanCommand
 614         name = ""
 615         option1 = ""
 616         option2 = ""
 617         argument = ""
 618         state = "WS"
 619         # Used to handle things like \command[foo[bar]]{foo{bar}}
 620         nestdepth = 0
 621         b = 0
 622         for c in command:
 623             if ((state == "CMDNAME" and c == ' ') or
 624                 (state == "CMDNAME" and c == '[') or
 625                 (state == "CMDNAME" and c == '{')):
 626                 state = "WS"
 627             if ((state == "OPTION" and c == ']') or
 628                 (state == "SECOPTION" and c == ']') or
 629                 (state == "CONTENT" and c == '}')):
 630                 if nestdepth == 0:
 631                     state = "WS"
 632                 else:
 633                     nestdepth = nestdepth - 1
 634             if ((state == "OPTION" and c == '[') or
 635                 (state == "SECOPTION" and c == '[') or
 636                 (state == "CONTENT" and c == '{')):
 637                 nestdepth = nestdepth + 1
 638             if state == "CMDNAME":
 639                     name += c
 640             elif state == "OPTION":
 641                     option1 += c
 642             elif state == "SECOPTION":
 643                     option2 += c
 644             elif state == "CONTENT":
 645                     argument += c
 646             elif state == "WS":
 647                 if c == '\\':
 648                     state = "CMDNAME"
 649                 elif c == '[' and b != ']':
 650                     state = "OPTION"
 651                     nestdepth = 0 # Just to be sure
 652                 elif c == '[' and b == ']':
 653                     state = "SECOPTION"
 654                     nestdepth = 0 # Just to be sure
 655                 elif c == '{':
 656                     state = "CONTENT"
 657                     nestdepth = 0 # Just to be sure
 658             b = c
 659
 660         # Now we have parsed the command, output the parameters
 661         lines = ["\\begin_inset LatexCommand %s" % name]
 662         if option1 != "":
 663             if commandparams_info[name][0] == "":
 664                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 665             else:
 666                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
 667         if option2 != "":
 668             if commandparams_info[name][1] == "":
 669                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 670             else:
 671                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
 672         if argument != "":
 673             if commandparams_info[name][2] == "":
 674                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 675             else:
 676                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
 677         document.body[i:i+1] = lines
 678         i = i + 1
 679
 680
 681 def revert_commandparams(document):
 682     regex = re.compile(r'(\S+)\s+(.+)')
 683     i = 0
 684     while 1:
 685         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 686         if i == -1:
 687             break
 688         name = document.body[i].split()[2]
 689         j = find_end_of_inset(document.body, i)
 690         preview_line = ""
 691         option1 = ""
 692         option2 = ""
 693         argument = ""
 694         for k in range(i + 1, j):
 695             match = re.match(regex, document.body[k])
 696             if match:
 697                 pname = match.group(1)
 698                 pvalue = match.group(2)
 699                 if pname == "preview":
 700                     preview_line = document.body[k]
 701                 elif (commandparams_info[name][0] != "" and
 702                       pname == commandparams_info[name][0]):
 703                     option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 704                 elif (commandparams_info[name][1] != "" and
 705                       pname == commandparams_info[name][1]):
 706                     option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 707                 elif (commandparams_info[name][2] != "" and
 708                       pname == commandparams_info[name][2]):
 709                     argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 710             elif document.body[k].strip() != "":
 711                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 712         if name == "bibitem":
 713             if option1 == "":
 714                 lines = ["\\bibitem {%s}" % argument]
 715             else:
 716                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 717         else:
 718             if option1 == "":
 719                 if option2 == "":
 720                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 721                 else:
 722                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 723             else:
 724                 if option2 == "":
 725                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 726                 else:
 727                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 728         if name != "bibitem":
 729             if preview_line != "":
 730                 lines.append(preview_line)
 731             lines.append('')
 732             lines.append('\\end_inset')
 733         document.body[i:j+1] = lines
 734         i += len(lines) + 1
 735
 736
 737 def revert_nomenclature(document):
 738     " Convert nomenclature entry to ERT. "
 739     regex = re.compile(r'(\S+)\s+(.+)')
 740     i = 0
 741     use_nomencl = 0
 742     while 1:
 743         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 744         if i == -1:
 745             break
 746         use_nomencl = 1
 747         j = find_end_of_inset(document.body, i + 1)
 748         preview_line = ""
 749         symbol = ""
 750         description = ""
 751         prefix = ""
 752         for k in range(i + 1, j):
 753             match = re.match(regex, document.body[k])
 754             if match:
 755                 name = match.group(1)
 756                 value = match.group(2)
 757                 if name == "preview":
 758                     preview_line = document.body[k]
 759                 elif name == "symbol":
 760                     symbol = value.strip('"').replace('\\"', '"')
 761                 elif name == "description":
 762                     description = value.strip('"').replace('\\"', '"')
 763                 elif name == "prefix":
 764                     prefix = value.strip('"').replace('\\"', '"')
 765             elif document.body[k].strip() != "":
 766                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 767         if prefix == "":
 768             command = 'nomenclature{%s}{%s}' % (symbol, description)
 769         else:
 770             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 771         document.body[i:j+1] = ['\\begin_inset ERT',
 772                                 'status collapsed',
 773                                 '',
 774                                 '\\begin_layout %s' % document.default_layout,
 775                                 '',
 776                                 '',
 777                                 '\\backslash',
 778                                 command,
 779                                 '\\end_layout',
 780                                 '',
 781                                 '\\end_inset']
 782         i = i + 11
 783     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 784         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 785         document.preamble.append('\\makenomenclature')
 786
 787
 788 def revert_printnomenclature(document):
 789     " Convert printnomenclature to ERT. "
 790     regex = re.compile(r'(\S+)\s+(.+)')
 791     i = 0
 792     use_nomencl = 0
 793     while 1:
 794         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 795         if i == -1:
 796             break
 797         use_nomencl = 1
 798         j = find_end_of_inset(document.body, i + 1)
 799         preview_line = ""
 800         labelwidth = ""
 801         for k in range(i + 1, j):
 802             match = re.match(regex, document.body[k])
 803             if match:
 804                 name = match.group(1)
 805                 value = match.group(2)
 806                 if name == "preview":
 807                     preview_line = document.body[k]
 808                 elif name == "labelwidth":
 809                     labelwidth = value.strip('"').replace('\\"', '"')
 810             elif document.body[k].strip() != "":
 811                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 812         if labelwidth == "":
 813             command = 'nomenclature{}'
 814         else:
 815             command = 'nomenclature[%s]' % labelwidth
 816         document.body[i:j+1] = ['\\begin_inset ERT',
 817                                 'status collapsed',
 818                                 '',
 819                                 '\\begin_layout %s' % document.default_layout,
 820                                 '',
 821                                 '',
 822                                 '\\backslash',
 823                                 command,
 824                                 '\\end_layout',
 825                                 '',
 826                                 '\\end_inset']
 827         i = i + 11
 828     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 829         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 830         document.preamble.append('\\makenomenclature')
 831
 832
 833 def convert_esint(document):
 834     " Add \\use_esint setting to header. "
 835     i = find_token(document.header, "\\cite_engine", 0)
 836     if i == -1:
 837         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 838         return
 839     # 0 is off, 1 is auto, 2 is on.
 840     document.header.insert(i, '\\use_esint 0')
 841
 842
 843 def revert_esint(document):
 844     " Remove \\use_esint setting from header. "
 845     i = find_token(document.header, "\\use_esint", 0)
 846     if i == -1:
 847         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 848         return
 849     use_esint = document.header[i].split()[1]
 850     del document.header[i]
 851     # 0 is off, 1 is auto, 2 is on.
 852     if (use_esint == 2):
 853         document.preamble.append('\\usepackage{esint}')
 854
 855
 856 def revert_clearpage(document):
 857     " clearpage -> ERT "
 858     i = 0
 859     while 1:
 860         i = find_token(document.body, "\\clearpage", i)
 861         if i == -1:
 862             break
 863         document.body[i:i+1] =  ['\\begin_inset ERT',
 864                                 'status collapsed',
 865                                 '',
 866                                 '\\begin_layout %s' % document.default_layout,
 867                                 '',
 868                                 '',
 869                                 '\\backslash',
 870                                 'clearpage',
 871                                 '\\end_layout',
 872                                 '',
 873                                 '\\end_inset']
 874     i = i + 1
 875
 876
 877 def revert_cleardoublepage(document):
 878     " cleardoublepage -> ERT "
 879     i = 0
 880     while 1:
 881         i = find_token(document.body, "\\cleardoublepage", i)
 882         if i == -1:
 883             break
 884         document.body[i:i+1] =  ['\\begin_inset ERT',
 885                                 'status collapsed',
 886                                 '',
 887                                 '\\begin_layout %s' % document.default_layout,
 888                                 '',
 889                                 '',
 890                                 '\\backslash',
 891                                 'cleardoublepage',
 892                                 '\\end_layout',
 893                                 '',
 894                                 '\\end_inset']
 895     i = i + 1
 896
 897
 898 def convert_lyxline(document):
 899     " remove fontsize commands for \lyxline "
 900     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 901     # to change the line thickness. The new definiton does this so that imported
 902     # \lyxlines would have a different line thickness. The eventual fontsize command
 903     # before \lyxline is therefore removed to get the same output.
 904     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 905                  "large", "Large", "LARGE", "huge", "Huge"]
 906     for n in range(0, len(fontsizes)):
 907         i = 0
 908         k = 0
 909         while i < len(document.body):
 910             i = find_token(document.body, "\\size " + fontsizes[n], i)
 911             k = find_token(document.body, "\\lyxline", i)
 912             # the corresponding fontsize command is always 2 lines before the \lyxline
 913             if (i != -1 and k == i+2):
 914                 document.body[i:i+1] = []
 915             else:
 916                 break
 917         i = i + 1
 918
 919
 920 def revert_encodings(document):
 921     " Set new encodings to auto. "
 922     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 923                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 924                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 925     i = find_token(document.header, "\\inputencoding", 0)
 926     if i == -1:
 927         document.header.append("\\inputencoding auto")
 928     else:
 929         inputenc = get_value(document.header, "\\inputencoding", i)
 930         if inputenc in encodings:
 931             document.header[i] = "\\inputencoding auto"
 932     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 933
 934
 935 def convert_caption(document):
 936     " Convert caption layouts to caption insets. "
 937     i = 0
 938     while 1:
 939         i = find_token(document.body, "\\begin_layout Caption", i)
 940         if i == -1:
 941             return
 942         j = find_end_of_layout(document.body, i)
 943         if j == -1:
 944             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 945             return
 946
 947         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 948         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 949                             "\\begin_inset Caption", "",
 950                             "\\begin_layout %s" % document.default_layout]
 951         i = i + 1
 952
 953
 954 def revert_caption(document):
 955     " Convert caption insets to caption layouts. "
 956     " This assumes that the text class has a caption style. "
 957     i = 0
 958     while 1:
 959         i = find_token(document.body, "\\begin_inset Caption", i)
 960         if i == -1:
 961             return
 962
 963         # We either need to delete the previous \begin_layout line, or we
 964         # need to end the previous layout if this inset is not in the first
 965         # position of the paragraph.
 966         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 967         if layout_before == -1:
 968             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 969             return
 970         layout_line = document.body[layout_before]
 971         del_layout_before = True
 972         l = layout_before + 1
 973         while l < i:
 974             if document.body[l] != "":
 975                 del_layout_before = False
 976                 break
 977             l = l + 1
 978         if del_layout_before:
 979             del document.body[layout_before:i]
 980             i = layout_before
 981         else:
 982             document.body[i:i] = ["\\end_layout", ""]
 983             i = i + 2
 984
 985         # Find start of layout in the inset and end of inset
 986         j = find_token(document.body, "\\begin_layout", i)
 987         if j == -1:
 988             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 989             return
 990         k = find_end_of_inset(document.body, i)
 991         if k == -1:
 992             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 993             return
 994
 995         # We either need to delete the following \end_layout line, or we need
 996         # to restart the old layout if this inset is not at the paragraph end.
 997         layout_after = find_token(document.body, "\\end_layout", k)
 998         if layout_after == -1:
 999             document.warning("Malformed LyX document: Missing `\\end_layout'.")
1000             return
1001         del_layout_after = True
1002         l = k + 1
1003         while l < layout_after:
1004             if document.body[l] != "":
1005                 del_layout_after = False
1006                 break
1007             l = l + 1
1008         if del_layout_after:
1009             del document.body[k+1:layout_after+1]
1010         else:
1011             document.body[k+1:k+1] = [layout_line, ""]
1012
1013         # delete \begin_layout and \end_inset and replace \begin_inset with
1014         # "\begin_layout Caption". This works because we can only have one
1015         # paragraph in the caption inset: The old \end_layout will be recycled.
1016         del document.body[k]
1017         if document.body[k] == "":
1018             del document.body[k]
1019         del document.body[j]
1020         if document.body[j] == "":
1021             del document.body[j]
1022         document.body[i] = "\\begin_layout Caption"
1023         if document.body[i+1] == "":
1024             del document.body[i+1]
1025         i = i + 1
1026
1027
1028 # Accents of InsetLaTeXAccent
1029 accent_map = {
1030     "`" : u'\u0300', # grave
1031     "'" : u'\u0301', # acute
1032     "^" : u'\u0302', # circumflex
1033     "~" : u'\u0303', # tilde
1034     "=" : u'\u0304', # macron
1035     "u" : u'\u0306', # breve
1036     "." : u'\u0307', # dot above
1037     "\"": u'\u0308', # diaeresis
1038     "r" : u'\u030a', # ring above
1039     "H" : u'\u030b', # double acute
1040     "v" : u'\u030c', # caron
1041     "b" : u'\u0320', # minus sign below
1042     "d" : u'\u0323', # dot below
1043     "c" : u'\u0327', # cedilla
1044     "k" : u'\u0328', # ogonek
1045     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1046                      # only one is given as argument, so we don't need to
1047                      # treat it differently.
1048 }
1049
1050
1051 # special accents of InsetLaTeXAccent without argument
1052 special_accent_map = {
1053     'i' : u'\u0131', # dotless i
1054     'j' : u'\u0237', # dotless j
1055     'l' : u'\u0142', # l with stroke
1056     'L' : u'\u0141'  # L with stroke
1057 }
1058
1059
1060 # special accent arguments of InsetLaTeXAccent
1061 accented_map = {
1062     '\\i' : u'\u0131', # dotless i
1063     '\\j' : u'\u0237'  # dotless j
1064 }
1065
1066
1067 def _convert_accent(accent, accented_char):
1068     type = accent
1069     char = accented_char
1070     if char == '':
1071         if type in special_accent_map:
1072             return special_accent_map[type]
1073         # a missing char is treated as space by LyX
1074         char = ' '
1075     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1076         # Special caron, only used with t, d, l and L.
1077         # It is not in the map because we convert it to the same unicode
1078         # character as the normal caron: \q{} is only defined if babel with
1079         # the czech or slovak language is used, and the normal caron
1080         # produces the correct output if the T1 font encoding is used.
1081         # For the same reason we never convert to \q{} in the other direction.
1082         type = 'v'
1083     elif char in accented_map:
1084         char = accented_map[char]
1085     elif (len(char) > 1):
1086         # We can only convert accents on a single char
1087         return ''
1088     a = accent_map.get(type)
1089     if a:
1090         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1091     return ''
1092
1093
1094 def convert_ertbackslash(body, i, ert, default_layout):
1095     r""" -------------------------------------------------------------------------------------------
1096     Convert backslashes and '\n' into valid ERT code, append the converted
1097     text to body[i] and return the (maybe incremented) line index i"""
1098
1099     for c in ert:
1100         if c == '\\':
1101             body[i] = body[i] + '\\backslash '
1102             i = i + 1
1103             body.insert(i, '')
1104         elif c == '\n':
1105             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1106             i = i + 4
1107         else:
1108             body[i] = body[i] + c
1109     return i
1110
1111
1112 def convert_accent(document):
1113     # The following forms are supported by LyX:
1114     # '\i \"{a}' (standard form, as written by LyX)
1115     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1116     # '\i \"{ }' (also accepted if the accented char is a space)
1117     # '\i \" a'  (also accepted)
1118     # '\i \"'    (also accepted)
1119     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1120     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1121     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1122     i = 0
1123     while 1:
1124         i = find_re(document.body, re_wholeinset, i)
1125         if i == -1:
1126             return
1127         match = re_wholeinset.match(document.body[i])
1128         prefix = match.group(1)
1129         contents = match.group(3).strip()
1130         match = re_contents.match(contents)
1131         if match:
1132             # Strip first char (always \)
1133             accent = match.group(1)[1:]
1134             accented_contents = match.group(2).strip()
1135             match = re_accentedcontents.match(accented_contents)
1136             accented_char = match.group(1)
1137             converted = _convert_accent(accent, accented_char)
1138             if converted == '':
1139                 # Normalize contents
1140                 contents = '%s{%s}' % (accent, accented_char),
1141             else:
1142                 document.body[i] = '%s%s' % (prefix, converted)
1143                 i += 1
1144                 continue
1145         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1146         document.body[i] = prefix
1147         document.body[i+1:i+1] = ['\\begin_inset ERT',
1148                                   'status collapsed',
1149                                   '',
1150                                   '\\begin_layout %s' % document.default_layout,
1151                                   '',
1152                                   '',
1153                                   '']
1154         i = convert_ertbackslash(document.body, i + 7,
1155                                  '\\%s' % contents,
1156                                  document.default_layout)
1157         document.body[i+1:i+1] = ['\\end_layout',
1158                                   '',
1159                                   '\\end_inset']
1160         i += 3
1161
1162
1163 def is_inset_line(document, i):
1164     """ Line i of body has an inset """
1165     if document.body[i][:1] == '\\':
1166         return True
1167     last_tokens = "".join(document.body[i].split()[-2:])
1168     return last_tokens.find('\\') != -1
1169
1170
1171 # A wrapper around normalize that handles special cases (cf. bug 3313)
1172 def normalize(form, text):
1173     # do not normalize OHM, ANGSTROM
1174     keep_characters = [0x2126,0x212b]
1175     result = ''
1176     convert = ''
1177     for i in text:
1178         if ord(i) in keep_characters:
1179             if len(convert) > 0:
1180                 result = result + unicodedata.normalize(form, convert)
1181                 convert = ''
1182             result = result + i
1183         else:
1184             convert = convert + i
1185     if len(convert) > 0:
1186         result = result + unicodedata.normalize(form, convert)
1187     return result
1188
1189
1190 def revert_accent(document):
1191     inverse_accent_map = {}
1192     for k in accent_map:
1193         inverse_accent_map[accent_map[k]] = k
1194     inverse_special_accent_map = {}
1195     for k in special_accent_map:
1196         inverse_special_accent_map[special_accent_map[k]] = k
1197     inverse_accented_map = {}
1198     for k in accented_map:
1199         inverse_accented_map[accented_map[k]] = k
1200
1201     # Since LyX may insert a line break within a word we must combine all
1202     # words before unicode normalization.
1203     # We do this only if the next line starts with an accent, otherwise we
1204     # would create things like '\begin_inset ERTstatus'.
1205     for i in range(len(document.body) - 1):
1206         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1207             continue
1208         if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1209             # the last character of this line and the first of the next line
1210             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1211             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1212                 document.body[i] += document.body[i+1][0]
1213                 document.body[i+1] = document.body[i+1][1:]
1214
1215     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1216     # This is needed to catch all accented characters.
1217     for i in range(len(document.body)):
1218         # Unfortunately we have a mixture of unicode strings and plain strings,
1219         # because we never use u'xxx' for string literals, but 'xxx'.
1220         # Therefore we may have to try two times to normalize the data.
1221         try:
1222             document.body[i] = normalize("NFD", document.body[i])
1223         except TypeError:
1224             document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
1225
1226     # Replace accented characters with InsetLaTeXAccent
1227     # Do not convert characters that can be represented in the chosen
1228     # encoding.
1229     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1230     lang_re = re.compile(r"^\\lang\s(\S+)")
1231
1232     i = 0
1233     while i < len(document.body):
1234         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1235             # Track the encoding of the current line
1236             result = lang_re.match(document.body[i])
1237             if result:
1238                 language = result.group(1)
1239                 if language == "default":
1240                     encoding_stack[-1] = document.encoding
1241                 else:
1242                     from lyx2lyx_lang import lang
1243                     encoding_stack[-1] = lang[language][3]
1244                 continue
1245             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1246                 encoding_stack.append(encoding_stack[-1])
1247                 continue
1248             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1249                 del encoding_stack[-1]
1250                 continue
1251
1252         for j in range(len(document.body[i])):
1253             # dotless i and dotless j are both in special_accent_map and can
1254             # occur as an accented character, so we need to test that the
1255             # following character is no accent
1256             if (document.body[i][j] in inverse_special_accent_map and
1257                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1258                 accent = document.body[i][j]
1259                 try:
1260                     dummy = accent.encode(encoding_stack[-1])
1261                 except UnicodeEncodeError:
1262                     # Insert the rest of the line as new line
1263                     if j < len(document.body[i]) - 1:
1264                         document.body.insert(i+1, document.body[i][j+1:])
1265                     # Delete the accented character
1266                     document.body[i] = document.body[i][:j]
1267                     # Finally add the InsetLaTeXAccent
1268                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1269                     break
1270             elif j > 0 and document.body[i][j] in inverse_accent_map:
1271                 accented_char = document.body[i][j-1]
1272                 if accented_char == ' ':
1273                     # Conform to LyX output
1274                     accented_char = ''
1275                 elif accented_char in inverse_accented_map:
1276                     accented_char = inverse_accented_map[accented_char]
1277                 accent = document.body[i][j]
1278                 try:
1279                     dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1280                 except UnicodeEncodeError:
1281                     # Insert the rest of the line as new line
1282                     if j < len(document.body[i]) - 1:
1283                         document.body.insert(i+1, document.body[i][j+1:])
1284                     # Delete the accented characters
1285                     document.body[i] = document.body[i][:j-1]
1286                     # Finally add the InsetLaTeXAccent
1287                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1288                     break
1289         i = i + 1
1290
1291     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1292     for i in range(len(document.body)):
1293         document.body[i] = normalize("NFC", document.body[i])
1294
1295
1296 def normalize_font_whitespace_259(document):
1297     """ Before format 259 the font changes were ignored if a
1298     whitespace was the first or last character in the sequence, this function
1299     transfers the whitespace outside."""
1300
1301     char_properties = {"\\series": "default",
1302                        "\\emph": "default",
1303                        "\\color": "none",
1304                        "\\shape": "default",
1305                        "\\bar": "default",
1306                        "\\family": "default"}
1307     return normalize_font_whitespace(document, char_properties)
1308
1309 def normalize_font_whitespace_274(document):
1310     """ Before format 259 (sic) the font changes were ignored if a
1311     whitespace was the first or last character in the sequence. This was
1312     corrected for most font properties in format 259, but the language
1313     was forgotten then. This function applies the same conversion done
1314     there (namely, transfers the whitespace outside) for font language
1315     changes, as well."""
1316
1317     char_properties = {"\\lang": "default"}
1318     return normalize_font_whitespace(document, char_properties)
1319
1320 def get_paragraph_language(document, i):
1321     """ Return the language of the paragraph in which line i of the document
1322     body is. If the first thing in the paragraph is a \\lang command, that
1323     is the paragraph's langauge; otherwise, the paragraph's language is the
1324     document's language."""
1325
1326     lines = document.body
1327
1328     first_nonempty_line = \
1329         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1330
1331     words = lines[first_nonempty_line].split()
1332
1333     if len(words) > 1 and words[0] == "\\lang":
1334         return words[1]
1335     else:
1336         return document.language
1337
1338 def normalize_font_whitespace(document, char_properties):
1339     """ Before format 259 the font changes were ignored if a
1340     whitespace was the first or last character in the sequence, this function
1341     transfers the whitespace outside. Only a change in one of the properties
1342     in the provided     char_properties is handled by this function."""
1343
1344     if document.backend != "latex":
1345         return
1346
1347     lines = document.body
1348
1349     changes = {}
1350
1351     i = 0
1352     while i < len(lines):
1353         words = lines[i].split()
1354
1355         if len(words) > 0 and words[0] == "\\begin_layout":
1356             # a new paragraph resets all font changes
1357             changes.clear()
1358             # also reset the default language to be the paragraph's language
1359             if "\\lang" in list(char_properties.keys()):
1360                 char_properties["\\lang"] = \
1361                     get_paragraph_language(document, i + 1)
1362
1363         elif len(words) > 1 and words[0] in list(char_properties.keys()):
1364             # we have a font change
1365             if char_properties[words[0]] == words[1]:
1366                 # property gets reset
1367                 if words[0] in list(changes.keys()):
1368                     del changes[words[0]]
1369                 defaultproperty = True
1370             else:
1371                 # property gets set
1372                 changes[words[0]] = words[1]
1373                 defaultproperty = False
1374
1375             # We need to explicitly reset all changed properties if we find
1376             # a space below, because LyX 1.4 would output the space after
1377             # closing the previous change and before starting the new one,
1378             # and closing a font change means to close all properties, not
1379             # just the changed one.
1380
1381             if lines[i-1] and lines[i-1][-1] == " ":
1382                 lines[i-1] = lines[i-1][:-1]
1383                 # a space before the font change
1384                 added_lines = [" "]
1385                 for k in list(changes.keys()):
1386                     # exclude property k because that is already in lines[i]
1387                     if k != words[0]:
1388                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1389                 for k in list(changes.keys()):
1390                     # exclude property k because that must be added below anyway
1391                     if k != words[0]:
1392                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1393                 if defaultproperty:
1394                     # Property is reset in lines[i], so add the new stuff afterwards
1395                     lines[i+1:i+1] = added_lines
1396                 else:
1397                     # Reset property for the space
1398                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1399                     lines[i:i] = added_lines
1400                 i = i + len(added_lines)
1401
1402             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1403                 # a space after the font change
1404                 if (lines[i+1] == " " and lines[i+2]):
1405                     next_words = lines[i+2].split()
1406                     if len(next_words) > 0 and next_words[0] == words[0]:
1407                         # a single blank with a property different from the
1408                         # previous and the next line must not be changed
1409                         i = i + 2
1410                         continue
1411                 lines[i+1] = lines[i+1][1:]
1412                 added_lines = [" "]
1413                 for k in list(changes.keys()):
1414                     # exclude property k because that is already in lines[i]
1415                     if k != words[0]:
1416                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1417                 for k in list(changes.keys()):
1418                     # exclude property k because that must be added below anyway
1419                     if k != words[0]:
1420                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1421                 # Reset property for the space
1422                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1423                 lines[i:i] = added_lines
1424                 i = i + len(added_lines)
1425
1426         i = i + 1
1427
1428
1429 def revert_utf8x(document):
1430     " Set utf8x encoding to utf8. "
1431     i = find_token(document.header, "\\inputencoding", 0)
1432     if i == -1:
1433         document.header.append("\\inputencoding auto")
1434     else:
1435         inputenc = get_value(document.header, "\\inputencoding", i)
1436         if inputenc == "utf8x":
1437             document.header[i] = "\\inputencoding utf8"
1438     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1439
1440
1441 def revert_utf8plain(document):
1442     " Set utf8plain encoding to utf8. "
1443     i = find_token(document.header, "\\inputencoding", 0)
1444     if i == -1:
1445         document.header.append("\\inputencoding auto")
1446     else:
1447         inputenc = get_value(document.header, "\\inputencoding", i)
1448         if inputenc == "utf8-plain":
1449             document.header[i] = "\\inputencoding utf8"
1450     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1451
1452
1453 def revert_beamer_alert(document):
1454     " Revert beamer's \\alert inset back to ERT. "
1455     i = 0
1456     while 1:
1457         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1458         if i == -1:
1459             return
1460         document.body[i] = "\\begin_inset ERT"
1461         i = i + 1
1462         while 1:
1463             if (document.body[i][:13] == "\\begin_layout"):
1464                 # Insert the \alert command
1465                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1466                 break
1467             i = i + 1
1468
1469         i = i + 1
1470
1471
1472 def revert_beamer_structure(document):
1473     " Revert beamer's \\structure inset back to ERT. "
1474     i = 0
1475     while 1:
1476         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1477         if i == -1:
1478             return
1479         document.body[i] = "\\begin_inset ERT"
1480         i = i + 1
1481         while 1:
1482             if (document.body[i][:13] == "\\begin_layout"):
1483                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1484                 break
1485             i = i + 1
1486
1487         i = i + 1
1488
1489
1490 def convert_changes(document):
1491     " Switch output_changes off if tracking_changes is off. "
1492     i = find_token(document.header, '\\tracking_changes', 0)
1493     if i == -1:
1494         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1495         return
1496     j = find_token(document.header, '\\output_changes', 0)
1497     if j == -1:
1498         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1499         return
1500     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1501     output_changes = get_value(document.header, "\\output_changes", j)
1502     if tracking_changes == "false" and output_changes == "true":
1503         document.header[j] = "\\output_changes false"
1504
1505
1506 def revert_ascii(document):
1507     " Set ascii encoding to auto. "
1508     i = find_token(document.header, "\\inputencoding", 0)
1509     if i == -1:
1510         document.header.append("\\inputencoding auto")
1511     else:
1512         inputenc = get_value(document.header, "\\inputencoding", i)
1513         if inputenc == "ascii":
1514             document.header[i] = "\\inputencoding auto"
1515     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1516
1517
1518 def normalize_language_name(document):
1519     lang = { "brazil": "brazilian",
1520              "portuges": "portuguese"}
1521
1522     if document.language in lang:
1523         document.language = lang[document.language]
1524         i = find_token(document.header, "\\language", 0)
1525         document.header[i] = "\\language %s" % document.language
1526
1527
1528 def revert_language_name(document):
1529     lang = { "brazilian": "brazil",
1530              "portuguese": "portuges"}
1531
1532     if document.language in lang:
1533         document.language = lang[document.language]
1534         i = find_token(document.header, "\\language", 0)
1535         document.header[i] = "\\language %s" % document.language
1536
1537 #
1538 #  \textclass cv -> \textclass simplecv
1539 def convert_cv_textclass(document):
1540     if document.textclass == "cv":
1541         document.textclass = "simplecv"
1542
1543
1544 def revert_cv_textclass(document):
1545     if document.textclass == "simplecv":
1546         document.textclass = "cv"
1547
1548
1549 #
1550 # add scaleBeforeRotation graphics param
1551 def convert_graphics_rotation(document):
1552     " add scaleBeforeRotation graphics parameter. "
1553     i = 0
1554     while 1:
1555         i = find_token(document.body, "\\begin_inset Graphics", i)
1556         if i == -1:
1557             return
1558         j = find_end_of_inset(document.body, i+1)
1559         if j == -1:
1560             # should not happen
1561             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1562         # Seach for rotateAngle and width or height or scale
1563         # If these params are not there, nothing needs to be done.
1564         k = find_token(document.body, "\trotateAngle", i + 1, j)
1565         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1566         if (k != -1 and l != -1):
1567             document.body.insert(j, 'scaleBeforeRotation')
1568         i = i + 1
1569
1570
1571 #
1572 # remove scaleBeforeRotation graphics param
1573 def revert_graphics_rotation(document):
1574     " remove scaleBeforeRotation graphics parameter. "
1575     i = 0
1576     while 1:
1577         i = find_token(document.body, "\\begin_inset Graphics", i)
1578         if i == -1:
1579             return
1580         j = find_end_of_inset(document.body, i + 1)
1581         if j == -1:
1582             # should not happen
1583             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1584         # If there's a scaleBeforeRotation param, just remove that
1585         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1586         if k != -1:
1587             del document.body[k]
1588         else:
1589             # if not, and if we have rotateAngle and width or height or scale,
1590             # we have to put the rotateAngle value to special
1591             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1592             special = get_value(document.body, 'special', i + 1, j)
1593             if rotateAngle != "":
1594                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1595                 if k == -1:
1596                     break
1597                 if special == "":
1598                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1599                 else:
1600                     l = find_token(document.body, "\tspecial", i + 1, j)
1601                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1602                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1603                 if k != -1:
1604                     del document.body[k]
1605         i = i + 1
1606
1607
1608
1609 def convert_tableborder(document):
1610     # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1611     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1612     i = 0
1613     while i < len(document.body):
1614         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1615         k = document.body[i].find("|>{", 0, len(document.body[i]))
1616         # the two tokens have to be in one line
1617         if (h != -1 and k != -1):
1618             # delete the "|"
1619             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
1620         i = i + 1
1621
1622
1623 def revert_tableborder(document):
1624     i = 0
1625     while i < len(document.body):
1626         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1627         k = document.body[i].find(">{", 0, len(document.body[i]))
1628         # the two tokens have to be in one line
1629         if (h != -1 and k != -1):
1630             # add the "|"
1631             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1632         i = i + 1
1633
1634
1635 def revert_armenian(document):
1636
1637     # set inputencoding from armscii8 to auto
1638     if document.inputencoding == "armscii8":
1639         i = find_token(document.header, "\\inputencoding", 0)
1640         if i != -1:
1641             document.header[i] = "\\inputencoding auto"
1642     # check if preamble exists, if not k is set to -1
1643     i = 0
1644     k = -1
1645     while i < len(document.preamble):
1646         if k == -1:
1647             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1648         if k == -1:
1649             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1650         i = i + 1
1651     # add the entry \usepackage{armtex} to the document preamble
1652     if document.language == "armenian":
1653         # set the armtex entry as the first preamble line
1654         if k != -1:
1655             document.preamble[0:0] = ["\\usepackage{armtex}"]
1656         # create the preamble when it doesn't exist
1657         else:
1658             document.preamble.append('\\usepackage{armtex}')
1659     # Set document language from armenian to english
1660     if document.language == "armenian":
1661         document.language = "english"
1662         i = find_token(document.header, "\\language", 0)
1663         if i != -1:
1664             document.header[i] = "\\language english"
1665
1666
1667 def revert_CJK(document):
1668     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1669     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1670                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1671     i = find_token(document.header, "\\inputencoding", 0)
1672     if i == -1:
1673         document.header.append("\\inputencoding auto")
1674     else:
1675         inputenc = get_value(document.header, "\\inputencoding", i)
1676         if inputenc in encodings:
1677             document.header[i] = "\\inputencoding default"
1678     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1679
1680     if document.language == "chinese-simplified" or \
1681        document.language == "chinese-traditional" or \
1682        document.language == "japanese" or document.language == "korean":
1683         document.language = "english"
1684         i = find_token(document.header, "\\language", 0)
1685         if i != -1:
1686             document.header[i] = "\\language english"
1687
1688
1689 def revert_preamble_listings_params(document):
1690     " Revert preamble option \listings_params "
1691     i = find_token(document.header, "\\listings_params", 0)
1692     if i != -1:
1693         document.preamble.append('\\usepackage{listings}')
1694         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1695         document.header.pop(i);
1696
1697
1698 def revert_listings_inset(document):
1699     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1700 FROM
1701
1702 \begin_inset
1703 lstparams "language=Delphi"
1704 inline true
1705 status open
1706
1707 \begin_layout Standard
1708 var i = 10;
1709 \end_layout
1710
1711 \end_inset
1712
1713 TO
1714
1715 \begin_inset ERT
1716 status open
1717 \begin_layout Standard
1718
1719
1720 \backslash
1721 lstinline[language=Delphi]{var i = 10;}
1722 \end_layout
1723
1724 \end_inset
1725
1726 There can be an caption inset in this inset
1727
1728 \begin_layout Standard
1729 \begin_inset Caption
1730
1731 \begin_layout Standard
1732 before label
1733 \begin_inset LatexCommand label
1734 name "lst:caption"
1735
1736 \end_inset
1737
1738 after label
1739 \end_layout
1740
1741 \end_inset
1742
1743
1744 \end_layout
1745
1746 '''
1747     i = 0
1748     while True:
1749         i = find_token(document.body, '\\begin_inset listings', i)
1750         if i == -1:
1751             break
1752         else:
1753             if not '\\usepackage{listings}' in document.preamble:
1754                 document.preamble.append('\\usepackage{listings}')
1755         j = find_end_of_inset(document.body, i + 1)
1756         if j == -1:
1757             # this should not happen
1758             break
1759         inline = 'false'
1760         params = ''
1761         status = 'open'
1762         # first three lines
1763         for line in range(i + 1, i + 4):
1764             if document.body[line].startswith('inline'):
1765                 inline = document.body[line].split()[1]
1766             if document.body[line].startswith('lstparams'):
1767                 params = document.body[line].split()[1].strip('"')
1768             if document.body[line].startswith('status'):
1769                 status = document.body[line].split()[1].strip()
1770                 k = line + 1
1771         # caption?
1772         caption = ''
1773         label = ''
1774         cap = find_token(document.body, '\\begin_inset Caption', i)
1775         if cap != -1:
1776             cap_end = find_end_of_inset(document.body, cap + 1)
1777             if cap_end == -1:
1778                 # this should not happen
1779                 break
1780             # label?
1781             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1782             if lbl != -1:
1783                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1784                 if lbl_end == -1:
1785                     # this should not happen
1786                     break
1787             else:
1788                 lbl = cap_end
1789                 lbl_end = cap_end
1790             for line in document.body[lbl : lbl_end + 1]:
1791                 if line.startswith('name '):
1792                     label = line.split()[1].strip('"')
1793                     break
1794             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1795                 if not line.startswith('\\'):
1796                     caption += line.strip()
1797             k = cap_end + 1
1798         inlinecode = ''
1799         # looking for the oneline code for lstinline
1800         inlinecode = document.body[find_end_of_layout(document.body,
1801             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1802         if len(caption) > 0:
1803             if len(params) == 0:
1804                 params = 'caption={%s}' % caption
1805             else:
1806                 params += ',caption={%s}' % caption
1807         if len(label) > 0:
1808             if len(params) == 0:
1809                 params = 'label={%s}' % label
1810             else:
1811                 params += ',label={%s}' % label
1812         if len(params) > 0:
1813             params = '[%s]' % params
1814             params = params.replace('\\', '\\backslash\n')
1815         if inline == 'true':
1816             document.body[i:(j+1)] = [r'\begin_inset ERT',
1817                                       'status %s' % status,
1818                                       r'\begin_layout %s' % document.default_layout,
1819                                       '',
1820                                       '',
1821                                       r'\backslash',
1822                                       'lstinline%s{%s}' % (params, inlinecode),
1823                                       r'\end_layout',
1824                                       '',
1825                                       r'\end_inset']
1826         else:
1827             document.body[i: j+1] =  [r'\begin_inset ERT',
1828                                       'status %s' % status,
1829                                       '',
1830                                       r'\begin_layout %s' % document.default_layout,
1831                                       '',
1832                                       '',
1833                                       r'\backslash',
1834                                       r'begin{lstlisting}%s' % params,
1835                                       r'\end_layout',
1836                                       '',
1837                                       r'\begin_layout %s' % document.default_layout,
1838                                     ] + document.body[k : j - 1] + \
1839                                      ['',
1840                                       r'\begin_layout %s' % document.default_layout,
1841                                       '',
1842                                       r'\backslash',
1843                                       'end{lstlisting}',
1844                                       r'\end_layout',
1845                                       '',
1846                                       r'\end_inset']
1847
1848
1849 def revert_include_listings(document):
1850     r''' Revert lstinputlisting Include option , translate
1851 \begin_inset Include \lstinputlisting{file}[opt]
1852 preview false
1853
1854 \end_inset
1855
1856 TO
1857
1858 \begin_inset ERT
1859 status open
1860
1861 \begin_layout Standard
1862
1863
1864 \backslash
1865 lstinputlisting{file}[opt]
1866 \end_layout
1867
1868 \end_inset
1869     '''
1870
1871     i = 0
1872     while True:
1873         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1874         if i == -1:
1875             break
1876         else:
1877             if not '\\usepackage{listings}' in document.preamble:
1878                 document.preamble.append('\\usepackage{listings}')
1879         j = find_end_of_inset(document.body, i + 1)
1880         if j == -1:
1881             # this should not happen
1882             break
1883         # find command line lstinputlisting{file}[options]
1884         cmd, file, option = '', '', ''
1885         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1886             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1887         option = option.replace('\\', '\\backslash\n')
1888         document.body[i : j + 1] = [r'\begin_inset ERT',
1889                                     'status open',
1890                                     '',
1891                                     r'\begin_layout %s' % document.default_layout,
1892                                     '',
1893                                     '',
1894                                     r'\backslash',
1895                                     '%s%s{%s}' % (cmd, option, file),
1896                                     r'\end_layout',
1897                                     '',
1898                                     r'\end_inset']
1899
1900
1901 def revert_ext_font_sizes(document):
1902     if document.backend != "latex": return
1903     if not document.textclass.startswith("ext"): return
1904
1905     fontsize = get_value(document.header, '\\paperfontsize', 0)
1906     if fontsize not in ('10', '11', '12'): return
1907     fontsize += 'pt'
1908
1909     i = find_token(document.header, '\\paperfontsize', 0)
1910     document.header[i] = '\\paperfontsize default'
1911
1912     i = find_token(document.header, '\\options', 0)
1913     if i == -1:
1914         i = find_token(document.header, '\\textclass', 0) + 1
1915         document.header[i:i] = ['\\options %s' % fontsize]
1916     else:
1917         document.header[i] += ',%s' % fontsize
1918
1919
1920 def convert_ext_font_sizes(document):
1921     if document.backend != "latex": return
1922     if not document.textclass.startswith("ext"): return
1923
1924     fontsize = get_value(document.header, '\\paperfontsize', 0)
1925     if fontsize != 'default': return
1926
1927     i = find_token(document.header, '\\options', 0)
1928     if i == -1: return
1929
1930     options = get_value(document.header, '\\options', i)
1931
1932     fontsizes = '10pt', '11pt', '12pt'
1933     for fs in fontsizes:
1934         if options.find(fs) != -1:
1935             break
1936     else: # this else will only be attained if the for cycle had no match
1937         return
1938
1939     options = options.split(',')
1940     for j, opt in enumerate(options):
1941         if opt in fontsizes:
1942             fontsize = opt[:-2]
1943             del options[j]
1944             break
1945     else:
1946         return
1947
1948     k = find_token(document.header, '\\paperfontsize', 0)
1949     document.header[k] = '\\paperfontsize %s' % fontsize
1950
1951     if options:
1952         document.header[i] = '\\options %s' % ','.join(options)
1953     else:
1954         del document.header[i]
1955
1956
1957 def revert_separator_layout(document):
1958     r'''Revert --Separator-- to a lyx note
1959 From
1960
1961 \begin_layout --Separator--
1962 something
1963 \end_layout
1964
1965 to
1966
1967 \begin_layout Standard
1968 \begin_inset Note Note
1969 status open
1970
1971 \begin_layout Standard
1972 Separate Evironment
1973 \end_layout
1974
1975 \end_inset
1976 something
1977
1978 \end_layout
1979
1980     '''
1981
1982     i = 0
1983     while True:
1984         i = find_token(document.body, r'\begin_layout --Separator--', i)
1985         if i == -1:
1986             break
1987         j = find_end_of_layout(document.body, i + 1)
1988         if j == -1:
1989             # this should not happen
1990             break
1991         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1992                                     r'\begin_inset Note Note',
1993                                     'status open',
1994                                     '',
1995                                     r'\begin_layout %s' % document.default_layout,
1996                                     'Separate Environment',
1997                                     r'\end_layout',
1998                                     '',
1999                                     r'\end_inset'] + \
2000                                     document.body[ i + 1 : j] + \
2001                                     ['',
2002                                     r'\end_layout'
2003                                     ]
2004
2005
2006 def convert_arabic (document):
2007     if document.language == "arabic":
2008         document.language = "arabic_arabtex"
2009         i = find_token(document.header, "\\language", 0)
2010         if i != -1:
2011             document.header[i] = "\\language arabic_arabtex"
2012     i = 0
2013     while i < len(document.body):
2014         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
2015         if (h != -1):
2016             # change the language name
2017             document.body[i] = '\lang arabic_arabtex'
2018         i = i + 1
2019
2020
2021 def revert_arabic (document):
2022     if document.language == "arabic_arabtex":
2023         document.language = "arabic"
2024         i = find_token(document.header, "\\language", 0)
2025         if i != -1:
2026             document.header[i] = "\\language arabic"
2027     i = 0
2028     while i < len(document.body):
2029         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2030         if (h != -1):
2031             # change the language name
2032             document.body[i] = '\lang arabic'
2033         i = i + 1
2034
2035
2036 ##
2037 # Conversion hub
2038 #
2039
2040 supported_versions = ["1.5.0","1.5"]
2041 convert = [[246, []],
2042            [247, [convert_font_settings]],
2043            [248, []],
2044            [249, [convert_utf8]],
2045            [250, []],
2046            [251, []],
2047            [252, [convert_commandparams, convert_bibitem]],
2048            [253, []],
2049            [254, [convert_esint]],
2050            [255, []],
2051            [256, []],
2052            [257, [convert_caption]],
2053            [258, [convert_lyxline]],
2054            [259, [convert_accent, normalize_font_whitespace_259]],
2055            [260, []],
2056            [261, [convert_changes]],
2057            [262, []],
2058            [263, [normalize_language_name]],
2059            [264, [convert_cv_textclass]],
2060            [265, [convert_tableborder]],
2061            [266, []],
2062            [267, []],
2063            [268, []],
2064            [269, []],
2065            [270, []],
2066            [271, [convert_ext_font_sizes]],
2067            [272, []],
2068            [273, []],
2069            [274, [normalize_font_whitespace_274]],
2070            [275, [convert_graphics_rotation]],
2071            [276, [convert_arabic]]
2072           ]
2073
2074 revert =  [
2075            [275, [revert_arabic]],
2076            [274, [revert_graphics_rotation]],
2077            [273, []],
2078            [272, [revert_separator_layout]],
2079            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2080            [270, [revert_ext_font_sizes]],
2081            [269, [revert_beamer_alert, revert_beamer_structure]],
2082            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2083            [267, [revert_CJK]],
2084            [266, [revert_utf8plain]],
2085            [265, [revert_armenian]],
2086            [264, [revert_tableborder]],
2087            [263, [revert_cv_textclass]],
2088            [262, [revert_language_name]],
2089            [261, [revert_ascii]],
2090            [260, []],
2091            [259, [revert_utf8x]],
2092            [258, []],
2093            [257, []],
2094            [256, [revert_caption]],
2095            [255, [revert_encodings]],
2096            [254, [revert_clearpage, revert_cleardoublepage]],
2097            [253, [revert_esint]],
2098            [252, [revert_nomenclature, revert_printnomenclature]],
2099            [251, [revert_commandparams]],
2100            [250, [revert_cs_label]],
2101            [249, []],
2102            [248, [revert_accent, revert_utf8, revert_unicode]],
2103            [247, [revert_booktabs]],
2104            [246, [revert_font_settings]],
2105            [245, [revert_framed]]]
2106
2107
2108 if __name__ == "__main__":
2109     pass