lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29 # Provide support for both python 2 and 3
  30 PY2 = sys.version_info[0] == 2
  31 if not PY2:
  32     text_type = str
  33     unichr = chr
  34 else:
  35     text_type = unicode
  36 # End of code to support for both python 2 and 3
  37
  38 ####################################################################
  39 # Private helper functions
  40
  41 def find_end_of_inset(lines, i):
  42     " Find end of inset, where lines[i] is included."
  43     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  44
  45 def find_end_of_layout(lines, i):
  46     " Find end of layout, where lines[i] is included."
  47     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  48
  49 def find_beginning_of_layout(lines, i):
  50     "Find beginning of layout, where lines[i] is included."
  51     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  52
  53 # End of helper functions
  54 ####################################################################
  55
  56
  57 ##
  58 #  Notes: Framed/Shaded
  59 #
  60
  61 def revert_framed(document):
  62     "Revert framed notes. "
  63     i = 0
  64     while True:
  65         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  66
  67         if i == -1:
  68             return
  69         document.body[i] = "\\begin_inset Note"
  70         i = i + 1
  71
  72
  73 ##
  74 #  Fonts
  75 #
  76
  77 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  78                     'times'   : 'times',   'palatino' : 'palatino',
  79                     'helvet'  : 'default', 'avant'    : 'default',
  80                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  81                     'pslatex' : 'times'}
  82 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  83                     'times'   : 'default', 'palatino' : 'default',
  84                     'helvet'  : 'helvet',  'avant'    : 'avant',
  85                     'newcent' : 'default', 'bookman'  : 'default',
  86                     'pslatex' : 'helvet'}
  87 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  88                     'times'   : 'default', 'palatino' : 'default',
  89                     'helvet'  : 'default', 'avant'    : 'default',
  90                     'newcent' : 'default', 'bookman'  : 'default',
  91                     'pslatex' : 'courier'}
  92
  93 def convert_font_settings(document):
  94     " Convert font settings. "
  95     i = 0
  96     i = find_token_exact(document.header, "\\fontscheme", i)
  97     if i == -1:
  98         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  99         return
 100     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
 101     if font_scheme == '':
 102         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
 103         font_scheme = 'default'
 104     if not font_scheme in list(roman_fonts.keys()):
 105         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
 106         font_scheme = 'default'
 107     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 108                           '\\font_sans %s' % sans_fonts[font_scheme],
 109                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 110                           '\\font_default_family default',
 111                           '\\font_sc false',
 112                           '\\font_osf false',
 113                           '\\font_sf_scale 100',
 114                           '\\font_tt_scale 100']
 115
 116
 117 def revert_font_settings(document):
 118     " Revert font settings. "
 119     i = 0
 120     insert_line = -1
 121     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 122     for family in 'roman', 'sans', 'typewriter':
 123         name = '\\font_%s' % family
 124         i = find_token_exact(document.header, name, i)
 125         if i == -1:
 126             document.warning("Malformed LyX document: Missing `%s'." % name)
 127             i = 0
 128         else:
 129             if (insert_line < 0):
 130                 insert_line = i
 131             fonts[family] = get_value(document.header, name, i, i + 1)
 132             del document.header[i]
 133     i = find_token_exact(document.header, '\\font_default_family', i)
 134     if i == -1:
 135         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 136         font_default_family = 'default'
 137     else:
 138         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 139         del document.header[i]
 140     i = find_token_exact(document.header, '\\font_sc', i)
 141     if i == -1:
 142         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 143         font_sc = 'false'
 144     else:
 145         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 146         del document.header[i]
 147     if font_sc != 'false':
 148         document.warning("Conversion of '\\font_sc' not yet implemented.")
 149     i = find_token_exact(document.header, '\\font_osf', i)
 150     if i == -1:
 151         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 152         font_osf = 'false'
 153     else:
 154         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 155         del document.header[i]
 156     i = find_token_exact(document.header, '\\font_sf_scale', i)
 157     if i == -1:
 158         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 159         font_sf_scale = '100'
 160     else:
 161         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 162         del document.header[i]
 163     if font_sf_scale != '100':
 164         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 165     i = find_token_exact(document.header, '\\font_tt_scale', i)
 166     if i == -1:
 167         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 168         font_tt_scale = '100'
 169     else:
 170         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 171         del document.header[i]
 172     if font_tt_scale != '100':
 173         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 174     for font_scheme in list(roman_fonts.keys()):
 175         if (roman_fonts[font_scheme] == fonts['roman'] and
 176             sans_fonts[font_scheme] == fonts['sans'] and
 177             typewriter_fonts[font_scheme] == fonts['typewriter']):
 178             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 179             if font_default_family != 'default':
 180                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 181             if font_osf == 'true':
 182                 document.warning("Ignoring `\\font_osf = true'")
 183             return
 184     font_scheme = 'default'
 185     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 186     if fonts['roman'] == 'cmr':
 187         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 188         if font_osf == 'true':
 189             document.preamble.append('\\usepackage{eco}')
 190             font_osf = 'false'
 191     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 192         if fonts['roman'] == font:
 193             document.preamble.append('\\usepackage{%s}' % font)
 194     for font in 'cmss', 'lmss', 'cmbr':
 195         if fonts['sans'] == font:
 196             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 197     for font in 'berasans':
 198         if fonts['sans'] == font:
 199             document.preamble.append('\\usepackage{%s}' % font)
 200     for font in 'cmtt', 'lmtt', 'cmtl':
 201         if fonts['typewriter'] == font:
 202             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 203     for font in 'courier', 'beramono', 'luximono':
 204         if fonts['typewriter'] == font:
 205             document.preamble.append('\\usepackage{%s}' % font)
 206     if font_default_family != 'default':
 207         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 208     if font_osf == 'true':
 209         document.warning("Ignoring `\\font_osf = true'")
 210
 211
 212 def revert_booktabs(document):
 213     " We remove the booktabs flag or everything else will become a mess. "
 214     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 215     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 216     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 217     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 218     i = 0
 219     while True:
 220         i = find_token(document.body, "\\begin_inset Tabular", i)
 221         if i == -1:
 222             return
 223         j = find_end_of_inset(document.body, i + 1)
 224         if j == -1:
 225             document.warning("Malformed LyX document: Could not find end of tabular.")
 226             continue
 227         for k in range(i, j):
 228             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 229                 document.warning("Converting 'booktabs' table to normal table.")
 230                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 231             if re.search(re_row, document.body[k]):
 232                 document.warning("Removing extra row space.")
 233                 document.body[k] = re_tspace.sub('', document.body[k])
 234                 document.body[k] = re_bspace.sub('', document.body[k])
 235                 document.body[k] = re_ispace.sub('', document.body[k])
 236         i = i + 1
 237
 238
 239 def convert_multiencoding(document, forward):
 240     """ Fix files with multiple encodings.
 241 Files with an inputencoding of "auto" or "default" and multiple languages
 242 where at least two languages have different default encodings are encoded
 243 in multiple encodings for file formats < 249. These files are incorrectly
 244 read and written (as if the whole file was in the encoding of the main
 245 language).
 246 This is not true for files written by CJK-LyX, they are always in the locale
 247 encoding.
 248
 249 This function
 250 - converts from fake unicode values to true unicode if forward is true, and
 251 - converts from true unicode values to fake unicode if forward is false.
 252 document.encoding must be set to the old value (format 248) in both cases.
 253
 254 We do this here and not in LyX.py because it is far easier to do the
 255 necessary parsing in modern formats than in ancient ones.
 256 """
 257     inset_types = ["Foot", "Note"]
 258     if document.cjk_encoding != '':
 259         return
 260     encoding_stack = [document.encoding]
 261     insets = []
 262     lang_re = re.compile(r"^\\lang\s(\S+)")
 263     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 264     if not forward: # no need to read file unless we are reverting
 265         spec_chars = read_unicodesymbols()
 266
 267     if document.inputencoding == "auto" or document.inputencoding == "default":
 268         i = 0
 269         while i < len(document.body):
 270             result = lang_re.match(document.body[i])
 271             if result:
 272                 language = result.group(1)
 273                 if language == "default":
 274                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 275                     encoding_stack[-1] = document.encoding
 276                 else:
 277                     from lyx2lyx_lang import lang
 278                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 279                     encoding_stack[-1] = lang[language][3]
 280             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 281                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 282                 if len(insets) > 0 and insets[-1] in inset_types:
 283                     from lyx2lyx_lang import lang
 284                     encoding_stack.append(lang[document.language][3])
 285                 else:
 286                     encoding_stack.append(encoding_stack[-1])
 287             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 288                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 289                 if len(encoding_stack) == 1:
 290                     # Don't remove the document encoding from the stack
 291                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 292                 else:
 293                     del encoding_stack[-1]
 294             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 295                 inset_result = inset_re.match(document.body[i])
 296                 if inset_result:
 297                     insets.append(inset_result.group(1))
 298                 else:
 299                     insets.append("")
 300             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 301                 del insets[-1]
 302             if encoding_stack[-1] != document.encoding:
 303                 if forward:
 304                     # This line has been incorrectly interpreted as if it was
 305                     # encoded in 'encoding'.
 306                     # Convert back to the 8bit string that was in the file.
 307                     orig = document.body[i].encode(document.encoding)
 308                     # Convert the 8bit string that was in the file to unicode
 309                     # with the correct encoding.
 310                     document.body[i] = orig.decode(encoding_stack[-1])
 311                 else:
 312                     try:
 313                         # Convert unicode to the 8bit string that will be written
 314                         # to the file with the correct encoding.
 315                         orig = document.body[i].encode(encoding_stack[-1])
 316                         # Convert the 8bit string that will be written to the
 317                         # file to fake unicode with the encoding that will later
 318                         # be used when writing to the file.
 319                         document.body[i] = orig.decode(document.encoding)
 320                     except:
 321                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 322                         document.body[i:i+1] = mod_line.split('\n')
 323                         i += len(mod_line.split('\n')) - 1
 324             i += 1
 325
 326
 327 def convert_utf8(document):
 328     " Set document encoding to UTF-8. "
 329     convert_multiencoding(document, True)
 330     document.encoding = "utf8"
 331
 332
 333 def revert_utf8(document):
 334     " Set document encoding to the value corresponding to inputencoding. "
 335     i = find_token(document.header, "\\inputencoding", 0)
 336     if i == -1:
 337         document.header.append("\\inputencoding auto")
 338     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 339         document.header[i] = "\\inputencoding auto"
 340     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 341     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 342     convert_multiencoding(document, False)
 343
 344
 345 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
 346 def read_unicodesymbols():
 347     " Read the unicodesymbols list of unicode characters and corresponding commands."
 348     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 349     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 350     spec_chars = {}
 351     for line in fp.readlines():
 352         if line[0] != '#':
 353             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 354             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 355             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 356             try:
 357                 # flag1 and flag2 are preamble and other flags
 358                 [ucs4,command,flag1,flag2] =line.split(None,3)
 359                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 360             except:
 361                 pass
 362     fp.close()
 363     return spec_chars
 364
 365
 366 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 367     # Define strings to start and end ERT and math insets
 368     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
 369     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 370     math_intro='\n\\begin_inset Formula $'
 371     math_outro='$\n\\end_inset'
 372
 373     mod_line = u''
 374     if i and not is_inset_line(document, i-1):
 375         last_char = document.body[i - 1][-1:]
 376     else:
 377         last_char = ''
 378
 379     line = document.body[i]
 380     for character in line:
 381         try:
 382             # Try to write the character
 383             dummy = character.encode(document.encoding)
 384             mod_line += character
 385             last_char = character
 386         except:
 387             # Try to replace with ERT/math inset
 388             if character in spec_chars:
 389                 command = spec_chars[character][0] # the command to replace unicode
 390                 flag1 = spec_chars[character][1]
 391                 flag2 = spec_chars[character][2]
 392                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 393                     # We have a character that should be combined with the previous
 394                     command += '{' + last_char + '}'
 395                     # Remove the last character. Ignore if it is whitespace
 396                     if len(last_char.rstrip()):
 397                         # last_char was found and is not whitespace
 398                         if mod_line:
 399                             mod_line = mod_line[:-1]
 400                         else: # last_char belongs to the last line
 401                             document.body[i-1] = document.body[i-1][:-1]
 402                     else:
 403                         # The last character was replaced by a command. For now it is
 404                         # ignored. This could be handled better.
 405                         pass
 406                 if command[0:2] == '\\\\':
 407                     if command[2:12]=='ensuremath':
 408                         if insets and insets[-1] == "ERT":
 409                             # math in ERT
 410                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 411                             command = command.replace('}', '$\n')
 412                         elif not insets or insets[-1] != "Formula":
 413                             # add a math inset with the replacement character
 414                             command = command.replace('\\\\ensuremath{\\', math_intro)
 415                             command = command.replace('}', math_outro)
 416                         else:
 417                             # we are already in a math inset
 418                             command = command.replace('\\\\ensuremath{\\', '')
 419                             command = command.replace('}', '')
 420                     else:
 421                         if insets and insets[-1] == "Formula":
 422                             # avoid putting an ERT in a math; instead put command as text
 423                             command = command.replace('\\\\', '\mathrm{')
 424                             command = command + '}'
 425                         elif not insets or insets[-1] != "ERT":
 426                             # add an ERT inset with the replacement character
 427                             command = command.replace('\\\\', '\n\\backslash\n')
 428                             command = ert_intro + command + ert_outro
 429                         else:
 430                             command = command.replace('\\\\', '\n\\backslash\n')
 431                     last_char = '' # indicate that the character should not be removed
 432                 mod_line += command
 433             else:
 434                 # Replace with replacement string
 435                 mod_line += replacement_character
 436     return mod_line
 437
 438
 439 def revert_unicode(document):
 440     '''Transform unicode characters that can not be written using the
 441 document encoding to commands according to the unicodesymbols
 442 file. Characters that can not be replaced by commands are replaced by
 443 an replacement string.  Flags other than 'combined' are currently not
 444 implemented.'''
 445     spec_chars = read_unicodesymbols()
 446     insets = [] # list of active insets
 447
 448     # Go through the document to capture all combining characters
 449     i = 0
 450     while i < len(document.body):
 451         line = document.body[i]
 452         # Check for insets
 453         if line.find('\\begin_inset') > -1:
 454             insets.append(line[13:].split()[0])
 455         if line.find('\\end_inset') > -1:
 456             del insets[-1]
 457
 458         # Try to write the line
 459         try:
 460             # If all goes well the line is written here
 461             dummy = line.encode(document.encoding)
 462             i += 1
 463         except:
 464             # Error, some character(s) in the line need to be replaced
 465             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 466             document.body[i:i+1] = mod_line.split('\n')
 467             i += len(mod_line.split('\n'))
 468
 469
 470 def revert_cs_label(document):
 471     " Remove status flag of charstyle label. "
 472     i = 0
 473     while True:
 474         i = find_token(document.body, "\\begin_inset CharStyle", i)
 475         if i == -1:
 476             return
 477         # Seach for a line starting 'show_label'
 478         # If it is not there, break with a warning message
 479         i = i + 1
 480         while True:
 481             if (document.body[i][:10] == "show_label"):
 482                 del document.body[i]
 483                 break
 484             elif (document.body[i][:13] == "\\begin_layout"):
 485                 document.warning("Malformed LyX document: Missing 'show_label'.")
 486                 break
 487             i = i + 1
 488
 489         i = i + 1
 490
 491
 492 def convert_bibitem(document):
 493     """ Convert
 494 \bibitem [option]{argument}
 495
 496 to
 497
 498 \begin_inset LatexCommand bibitem
 499 label "option"
 500 key "argument"
 501
 502 \end_inset
 503
 504 This must be called after convert_commandparams.
 505 """
 506     i = 0
 507     while True:
 508         i = find_token(document.body, "\\bibitem", i)
 509         if i == -1:
 510             break
 511         j = document.body[i].find('[') + 1
 512         k = document.body[i].rfind(']')
 513         if j == 0: # No optional argument found
 514             option = None
 515         else:
 516             option = document.body[i][j:k]
 517         j = document.body[i].rfind('{') + 1
 518         k = document.body[i].rfind('}')
 519         argument = document.body[i][j:k]
 520         lines = ['\\begin_inset LatexCommand bibitem']
 521         if option != None:
 522             lines.append('label "%s"' % option.replace('"', '\\"'))
 523         lines.append('key "%s"' % argument.replace('"', '\\"'))
 524         lines.append('')
 525         lines.append('\\end_inset')
 526         document.body[i:i+1] = lines
 527         i = i + 1
 528
 529
 530 commandparams_info = {
 531     # command : [option1, option2, argument]
 532     "bibitem" : ["label", "", "key"],
 533     "bibtex" : ["options", "btprint", "bibfiles"],
 534     "cite"        : ["after", "before", "key"],
 535     "citet"       : ["after", "before", "key"],
 536     "citep"       : ["after", "before", "key"],
 537     "citealt"     : ["after", "before", "key"],
 538     "citealp"     : ["after", "before", "key"],
 539     "citeauthor"  : ["after", "before", "key"],
 540     "citeyear"    : ["after", "before", "key"],
 541     "citeyearpar" : ["after", "before", "key"],
 542     "citet*"      : ["after", "before", "key"],
 543     "citep*"      : ["after", "before", "key"],
 544     "citealt*"    : ["after", "before", "key"],
 545     "citealp*"    : ["after", "before", "key"],
 546     "citeauthor*" : ["after", "before", "key"],
 547     "Citet"       : ["after", "before", "key"],
 548     "Citep"       : ["after", "before", "key"],
 549     "Citealt"     : ["after", "before", "key"],
 550     "Citealp"     : ["after", "before", "key"],
 551     "Citeauthor"  : ["after", "before", "key"],
 552     "Citet*"      : ["after", "before", "key"],
 553     "Citep*"      : ["after", "before", "key"],
 554     "Citealt*"    : ["after", "before", "key"],
 555     "Citealp*"    : ["after", "before", "key"],
 556     "Citeauthor*" : ["after", "before", "key"],
 557     "citefield"   : ["after", "before", "key"],
 558     "citetitle"   : ["after", "before", "key"],
 559     "cite*"       : ["after", "before", "key"],
 560     "hfill" : ["", "", ""],
 561     "index"      : ["", "", "name"],
 562     "printindex" : ["", "", "name"],
 563     "label" : ["", "", "name"],
 564     "eqref"     : ["name", "", "reference"],
 565     "pageref"   : ["name", "", "reference"],
 566     "prettyref" : ["name", "", "reference"],
 567     "ref"       : ["name", "", "reference"],
 568     "vpageref"  : ["name", "", "reference"],
 569     "vref"      : ["name", "", "reference"],
 570     "tableofcontents" : ["", "", "type"],
 571     "htmlurl" : ["name", "", "target"],
 572     "url"     : ["name", "", "target"]}
 573
 574
 575 def convert_commandparams(document):
 576     """ Convert
 577
 578  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 579  \end_inset
 580
 581  to
 582
 583  \begin_inset LatexCommand cmdname
 584  name1 "opt1"
 585  name2 "opt2"
 586  name3 "arg"
 587  \end_inset
 588
 589  name1, name2 and name3 can be different for each command.
 590 """
 591     # \begin_inset LatexCommand bibitem was not the official version (see
 592     # convert_bibitem()), but could be read in, so we convert it here, too.
 593
 594     i = 0
 595     while True:
 596         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 597         if i == -1:
 598             break
 599         command = document.body[i][26:].strip()
 600         if command == "":
 601             document.warning("Malformed LyX document: Missing LatexCommand name.")
 602             i = i + 1
 603             continue
 604
 605         j = find_token(document.body, "\\end_inset", i + 1)
 606         if j == -1:
 607             document.warning("Malformed document")
 608         else:
 609             command += "".join(document.body[i+1:j])
 610             document.body[i+1:j] = []
 611
 612         # The following parser is taken from the original InsetCommandParams::scanCommand
 613         name = ""
 614         option1 = ""
 615         option2 = ""
 616         argument = ""
 617         state = "WS"
 618         # Used to handle things like \command[foo[bar]]{foo{bar}}
 619         nestdepth = 0
 620         b = 0
 621         for c in command:
 622             if ((state == "CMDNAME" and c == ' ') or
 623                 (state == "CMDNAME" and c == '[') or
 624                 (state == "CMDNAME" and c == '{')):
 625                 state = "WS"
 626             if ((state == "OPTION" and c == ']') or
 627                 (state == "SECOPTION" and c == ']') or
 628                 (state == "CONTENT" and c == '}')):
 629                 if nestdepth == 0:
 630                     state = "WS"
 631                 else:
 632                     nestdepth = nestdepth - 1
 633             if ((state == "OPTION" and c == '[') or
 634                 (state == "SECOPTION" and c == '[') or
 635                 (state == "CONTENT" and c == '{')):
 636                 nestdepth = nestdepth + 1
 637             if state == "CMDNAME":
 638                     name += c
 639             elif state == "OPTION":
 640                     option1 += c
 641             elif state == "SECOPTION":
 642                     option2 += c
 643             elif state == "CONTENT":
 644                     argument += c
 645             elif state == "WS":
 646                 if c == '\\':
 647                     state = "CMDNAME"
 648                 elif c == '[' and b != ']':
 649                     state = "OPTION"
 650                     nestdepth = 0 # Just to be sure
 651                 elif c == '[' and b == ']':
 652                     state = "SECOPTION"
 653                     nestdepth = 0 # Just to be sure
 654                 elif c == '{':
 655                     state = "CONTENT"
 656                     nestdepth = 0 # Just to be sure
 657             b = c
 658
 659         # Now we have parsed the command, output the parameters
 660         lines = ["\\begin_inset LatexCommand %s" % name]
 661         if option1 != "":
 662             if commandparams_info[name][0] == "":
 663                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 664             else:
 665                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
 666         if option2 != "":
 667             if commandparams_info[name][1] == "":
 668                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 669             else:
 670                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
 671         if argument != "":
 672             if commandparams_info[name][2] == "":
 673                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 674             else:
 675                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
 676         document.body[i:i+1] = lines
 677         i = i + 1
 678
 679
 680 def revert_commandparams(document):
 681     regex = re.compile(r'(\S+)\s+(.+)')
 682     i = 0
 683     while True:
 684         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 685         if i == -1:
 686             break
 687         name = document.body[i].split()[2]
 688         j = find_end_of_inset(document.body, i)
 689         preview_line = ""
 690         option1 = ""
 691         option2 = ""
 692         argument = ""
 693         for k in range(i + 1, j):
 694             match = re.match(regex, document.body[k])
 695             if match:
 696                 pname = match.group(1)
 697                 pvalue = match.group(2)
 698                 if pname == "preview":
 699                     preview_line = document.body[k]
 700                 elif (commandparams_info[name][0] != "" and
 701                       pname == commandparams_info[name][0]):
 702                     option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 703                 elif (commandparams_info[name][1] != "" and
 704                       pname == commandparams_info[name][1]):
 705                     option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 706                 elif (commandparams_info[name][2] != "" and
 707                       pname == commandparams_info[name][2]):
 708                     argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
 709             elif document.body[k].strip() != "":
 710                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 711         if name == "bibitem":
 712             if option1 == "":
 713                 lines = ["\\bibitem {%s}" % argument]
 714             else:
 715                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 716         else:
 717             if option1 == "":
 718                 if option2 == "":
 719                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 720                 else:
 721                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 722             else:
 723                 if option2 == "":
 724                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 725                 else:
 726                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 727         if name != "bibitem":
 728             if preview_line != "":
 729                 lines.append(preview_line)
 730             lines.append('')
 731             lines.append('\\end_inset')
 732         document.body[i:j+1] = lines
 733         i += len(lines) + 1
 734
 735
 736 def revert_nomenclature(document):
 737     " Convert nomenclature entry to ERT. "
 738     regex = re.compile(r'(\S+)\s+(.+)')
 739     i = 0
 740     use_nomencl = 0
 741     while True:
 742         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 743         if i == -1:
 744             break
 745         use_nomencl = 1
 746         j = find_end_of_inset(document.body, i + 1)
 747         preview_line = ""
 748         symbol = ""
 749         description = ""
 750         prefix = ""
 751         for k in range(i + 1, j):
 752             match = re.match(regex, document.body[k])
 753             if match:
 754                 name = match.group(1)
 755                 value = match.group(2)
 756                 if name == "preview":
 757                     preview_line = document.body[k]
 758                 elif name == "symbol":
 759                     symbol = value.strip('"').replace('\\"', '"')
 760                 elif name == "description":
 761                     description = value.strip('"').replace('\\"', '"')
 762                 elif name == "prefix":
 763                     prefix = value.strip('"').replace('\\"', '"')
 764             elif document.body[k].strip() != "":
 765                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 766         if prefix == "":
 767             command = 'nomenclature{%s}{%s}' % (symbol, description)
 768         else:
 769             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 770         document.body[i:j+1] = ['\\begin_inset ERT',
 771                                 'status collapsed',
 772                                 '',
 773                                 '\\begin_layout %s' % document.default_layout,
 774                                 '',
 775                                 '',
 776                                 '\\backslash',
 777                                 command,
 778                                 '\\end_layout',
 779                                 '',
 780                                 '\\end_inset']
 781         i = i + 11
 782     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 783         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 784         document.preamble.append('\\makenomenclature')
 785
 786
 787 def revert_printnomenclature(document):
 788     " Convert printnomenclature to ERT. "
 789     regex = re.compile(r'(\S+)\s+(.+)')
 790     i = 0
 791     use_nomencl = 0
 792     while True:
 793         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 794         if i == -1:
 795             break
 796         use_nomencl = 1
 797         j = find_end_of_inset(document.body, i + 1)
 798         preview_line = ""
 799         labelwidth = ""
 800         for k in range(i + 1, j):
 801             match = re.match(regex, document.body[k])
 802             if match:
 803                 name = match.group(1)
 804                 value = match.group(2)
 805                 if name == "preview":
 806                     preview_line = document.body[k]
 807                 elif name == "labelwidth":
 808                     labelwidth = value.strip('"').replace('\\"', '"')
 809             elif document.body[k].strip() != "":
 810                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 811         if labelwidth == "":
 812             command = 'nomenclature{}'
 813         else:
 814             command = 'nomenclature[%s]' % labelwidth
 815         document.body[i:j+1] = ['\\begin_inset ERT',
 816                                 'status collapsed',
 817                                 '',
 818                                 '\\begin_layout %s' % document.default_layout,
 819                                 '',
 820                                 '',
 821                                 '\\backslash',
 822                                 command,
 823                                 '\\end_layout',
 824                                 '',
 825                                 '\\end_inset']
 826         i = i + 11
 827     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 828         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 829         document.preamble.append('\\makenomenclature')
 830
 831
 832 def convert_esint(document):
 833     " Add \\use_esint setting to header. "
 834     i = find_token(document.header, "\\cite_engine", 0)
 835     if i == -1:
 836         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 837         return
 838     # 0 is off, 1 is auto, 2 is on.
 839     document.header.insert(i, '\\use_esint 0')
 840
 841
 842 def revert_esint(document):
 843     " Remove \\use_esint setting from header. "
 844     i = find_token(document.header, "\\use_esint", 0)
 845     if i == -1:
 846         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 847         return
 848     use_esint = document.header[i].split()[1]
 849     del document.header[i]
 850     # 0 is off, 1 is auto, 2 is on.
 851     if (use_esint == 2):
 852         document.preamble.append('\\usepackage{esint}')
 853
 854
 855 def revert_clearpage(document):
 856     " clearpage -> ERT "
 857     i = 0
 858     while True:
 859         i = find_token(document.body, "\\clearpage", i)
 860         if i == -1:
 861             break
 862         document.body[i:i+1] =  ['\\begin_inset ERT',
 863                                 'status collapsed',
 864                                 '',
 865                                 '\\begin_layout %s' % document.default_layout,
 866                                 '',
 867                                 '',
 868                                 '\\backslash',
 869                                 'clearpage',
 870                                 '\\end_layout',
 871                                 '',
 872                                 '\\end_inset']
 873     i = i + 1
 874
 875
 876 def revert_cleardoublepage(document):
 877     " cleardoublepage -> ERT "
 878     i = 0
 879     while True:
 880         i = find_token(document.body, "\\cleardoublepage", i)
 881         if i == -1:
 882             break
 883         document.body[i:i+1] =  ['\\begin_inset ERT',
 884                                 'status collapsed',
 885                                 '',
 886                                 '\\begin_layout %s' % document.default_layout,
 887                                 '',
 888                                 '',
 889                                 '\\backslash',
 890                                 'cleardoublepage',
 891                                 '\\end_layout',
 892                                 '',
 893                                 '\\end_inset']
 894     i = i + 1
 895
 896
 897 def convert_lyxline(document):
 898     " remove fontsize commands for \lyxline "
 899     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 900     # to change the line thickness. The new definiton does this so that imported
 901     # \lyxlines would have a different line thickness. The eventual fontsize command
 902     # before \lyxline is therefore removed to get the same output.
 903     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 904                  "large", "Large", "LARGE", "huge", "Huge"]
 905     for n in range(0, len(fontsizes)):
 906         i = 0
 907         k = 0
 908         while i < len(document.body):
 909             i = find_token(document.body, "\\size " + fontsizes[n], i)
 910             k = find_token(document.body, "\\lyxline", i)
 911             # the corresponding fontsize command is always 2 lines before the \lyxline
 912             if (i != -1 and k == i+2):
 913                 document.body[i:i+1] = []
 914             else:
 915                 break
 916         i = i + 1
 917
 918
 919 def revert_encodings(document):
 920     " Set new encodings to auto. "
 921     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 922                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 923                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 924     i = find_token(document.header, "\\inputencoding", 0)
 925     if i == -1:
 926         document.header.append("\\inputencoding auto")
 927     else:
 928         inputenc = get_value(document.header, "\\inputencoding", i)
 929         if inputenc in encodings:
 930             document.header[i] = "\\inputencoding auto"
 931     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 932
 933
 934 def convert_caption(document):
 935     " Convert caption layouts to caption insets. "
 936     i = 0
 937     while True:
 938         i = find_token(document.body, "\\begin_layout Caption", i)
 939         if i == -1:
 940             return
 941         j = find_end_of_layout(document.body, i)
 942         if j == -1:
 943             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 944             return
 945
 946         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 947         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 948                             "\\begin_inset Caption", "",
 949                             "\\begin_layout %s" % document.default_layout]
 950         i = i + 1
 951
 952
 953 def revert_caption(document):
 954     " Convert caption insets to caption layouts. "
 955     " This assumes that the text class has a caption style. "
 956     i = 0
 957     while True:
 958         i = find_token(document.body, "\\begin_inset Caption", i)
 959         if i == -1:
 960             return
 961
 962         # We either need to delete the previous \begin_layout line, or we
 963         # need to end the previous layout if this inset is not in the first
 964         # position of the paragraph.
 965         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 966         if layout_before == -1:
 967             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 968             return
 969         layout_line = document.body[layout_before]
 970         del_layout_before = True
 971         l = layout_before + 1
 972         while l < i:
 973             if document.body[l] != "":
 974                 del_layout_before = False
 975                 break
 976             l = l + 1
 977         if del_layout_before:
 978             del document.body[layout_before:i]
 979             i = layout_before
 980         else:
 981             document.body[i:i] = ["\\end_layout", ""]
 982             i = i + 2
 983
 984         # Find start of layout in the inset and end of inset
 985         j = find_token(document.body, "\\begin_layout", i)
 986         if j == -1:
 987             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 988             return
 989         k = find_end_of_inset(document.body, i)
 990         if k == -1:
 991             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 992             return
 993
 994         # We either need to delete the following \end_layout line, or we need
 995         # to restart the old layout if this inset is not at the paragraph end.
 996         layout_after = find_token(document.body, "\\end_layout", k)
 997         if layout_after == -1:
 998             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 999             return
1000         del_layout_after = True
1001         l = k + 1
1002         while l < layout_after:
1003             if document.body[l] != "":
1004                 del_layout_after = False
1005                 break
1006             l = l + 1
1007         if del_layout_after:
1008             del document.body[k+1:layout_after+1]
1009         else:
1010             document.body[k+1:k+1] = [layout_line, ""]
1011
1012         # delete \begin_layout and \end_inset and replace \begin_inset with
1013         # "\begin_layout Caption". This works because we can only have one
1014         # paragraph in the caption inset: The old \end_layout will be recycled.
1015         del document.body[k]
1016         if document.body[k] == "":
1017             del document.body[k]
1018         del document.body[j]
1019         if document.body[j] == "":
1020             del document.body[j]
1021         document.body[i] = "\\begin_layout Caption"
1022         if document.body[i+1] == "":
1023             del document.body[i+1]
1024         i = i + 1
1025
1026
1027 # Accents of InsetLaTeXAccent
1028 accent_map = {
1029     "`" : u'\u0300', # grave
1030     "'" : u'\u0301', # acute
1031     "^" : u'\u0302', # circumflex
1032     "~" : u'\u0303', # tilde
1033     "=" : u'\u0304', # macron
1034     "u" : u'\u0306', # breve
1035     "." : u'\u0307', # dot above
1036     "\"": u'\u0308', # diaeresis
1037     "r" : u'\u030a', # ring above
1038     "H" : u'\u030b', # double acute
1039     "v" : u'\u030c', # caron
1040     "b" : u'\u0320', # minus sign below
1041     "d" : u'\u0323', # dot below
1042     "c" : u'\u0327', # cedilla
1043     "k" : u'\u0328', # ogonek
1044     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1045                      # only one is given as argument, so we don't need to
1046                      # treat it differently.
1047 }
1048
1049
1050 # special accents of InsetLaTeXAccent without argument
1051 special_accent_map = {
1052     'i' : u'\u0131', # dotless i
1053     'j' : u'\u0237', # dotless j
1054     'l' : u'\u0142', # l with stroke
1055     'L' : u'\u0141'  # L with stroke
1056 }
1057
1058
1059 # special accent arguments of InsetLaTeXAccent
1060 accented_map = {
1061     '\\i' : u'\u0131', # dotless i
1062     '\\j' : u'\u0237'  # dotless j
1063 }
1064
1065
1066 def _convert_accent(accent, accented_char):
1067     type = accent
1068     char = accented_char
1069     if char == '':
1070         if type in special_accent_map:
1071             return special_accent_map[type]
1072         # a missing char is treated as space by LyX
1073         char = ' '
1074     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1075         # Special caron, only used with t, d, l and L.
1076         # It is not in the map because we convert it to the same unicode
1077         # character as the normal caron: \q{} is only defined if babel with
1078         # the czech or slovak language is used, and the normal caron
1079         # produces the correct output if the T1 font encoding is used.
1080         # For the same reason we never convert to \q{} in the other direction.
1081         type = 'v'
1082     elif char in accented_map:
1083         char = accented_map[char]
1084     elif (len(char) > 1):
1085         # We can only convert accents on a single char
1086         return ''
1087     a = accent_map.get(type)
1088     if a:
1089         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1090     return ''
1091
1092
1093 def convert_ertbackslash(body, i, ert, default_layout):
1094     r""" -------------------------------------------------------------------------------------------
1095     Convert backslashes and '\n' into valid ERT code, append the converted
1096     text to body[i] and return the (maybe incremented) line index i"""
1097
1098     for c in ert:
1099         if c == '\\':
1100             body[i] = body[i] + '\\backslash '
1101             i = i + 1
1102             body.insert(i, '')
1103         elif c == '\n':
1104             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1105             i = i + 4
1106         else:
1107             body[i] = body[i] + c
1108     return i
1109
1110
1111 def convert_accent(document):
1112     # The following forms are supported by LyX:
1113     # '\i \"{a}' (standard form, as written by LyX)
1114     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1115     # '\i \"{ }' (also accepted if the accented char is a space)
1116     # '\i \" a'  (also accepted)
1117     # '\i \"'    (also accepted)
1118     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1119     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1120     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1121     i = 0
1122     while True:
1123         i = find_re(document.body, re_wholeinset, i)
1124         if i == -1:
1125             return
1126         match = re_wholeinset.match(document.body[i])
1127         prefix = match.group(1)
1128         contents = match.group(3).strip()
1129         match = re_contents.match(contents)
1130         if match:
1131             # Strip first char (always \)
1132             accent = match.group(1)[1:]
1133             accented_contents = match.group(2).strip()
1134             match = re_accentedcontents.match(accented_contents)
1135             accented_char = match.group(1)
1136             converted = _convert_accent(accent, accented_char)
1137             if converted == '':
1138                 # Normalize contents
1139                 contents = '%s{%s}' % (accent, accented_char),
1140             else:
1141                 document.body[i] = '%s%s' % (prefix, converted)
1142                 i += 1
1143                 continue
1144         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1145         document.body[i] = prefix
1146         document.body[i+1:i+1] = ['\\begin_inset ERT',
1147                                   'status collapsed',
1148                                   '',
1149                                   '\\begin_layout %s' % document.default_layout,
1150                                   '',
1151                                   '',
1152                                   '']
1153         i = convert_ertbackslash(document.body, i + 7,
1154                                  '\\%s' % contents,
1155                                  document.default_layout)
1156         document.body[i+1:i+1] = ['\\end_layout',
1157                                   '',
1158                                   '\\end_inset']
1159         i += 3
1160
1161
1162 def is_inset_line(document, i):
1163     """ Line i of body has an inset """
1164     if document.body[i][:1] == '\\':
1165         return True
1166     last_tokens = "".join(document.body[i].split()[-2:])
1167     return last_tokens.find('\\') != -1
1168
1169
1170 # A wrapper around normalize that handles special cases (cf. bug 3313)
1171 def normalize(form, text):
1172     # do not normalize OHM, ANGSTROM
1173     keep_characters = [0x2126,0x212b]
1174     result = ''
1175     convert = ''
1176     for i in text:
1177         if ord(i) in keep_characters:
1178             if len(convert) > 0:
1179                 result = result + unicodedata.normalize(form, convert)
1180                 convert = ''
1181             result = result + i
1182         else:
1183             convert = convert + i
1184     if len(convert) > 0:
1185         result = result + unicodedata.normalize(form, convert)
1186     return result
1187
1188
1189 def revert_accent(document):
1190     inverse_accent_map = {}
1191     for k in accent_map:
1192         inverse_accent_map[accent_map[k]] = k
1193     inverse_special_accent_map = {}
1194     for k in special_accent_map:
1195         inverse_special_accent_map[special_accent_map[k]] = k
1196     inverse_accented_map = {}
1197     for k in accented_map:
1198         inverse_accented_map[accented_map[k]] = k
1199
1200     # Since LyX may insert a line break within a word we must combine all
1201     # words before unicode normalization.
1202     # We do this only if the next line starts with an accent, otherwise we
1203     # would create things like '\begin_inset ERTstatus'.
1204     for i in range(len(document.body) - 1):
1205         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1206             continue
1207         if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1208             # the last character of this line and the first of the next line
1209             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1210             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1211                 document.body[i] += document.body[i+1][0]
1212                 document.body[i+1] = document.body[i+1][1:]
1213
1214     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1215     # This is needed to catch all accented characters.
1216     for i in range(len(document.body)):
1217         # Unfortunately we have a mixture of unicode strings and plain strings,
1218         # because we never use u'xxx' for string literals, but 'xxx'.
1219         # Therefore we may have to try two times to normalize the data.
1220         try:
1221             document.body[i] = normalize("NFD", document.body[i])
1222         except TypeError:
1223             document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
1224
1225     # Replace accented characters with InsetLaTeXAccent
1226     # Do not convert characters that can be represented in the chosen
1227     # encoding.
1228     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1229     lang_re = re.compile(r"^\\lang\s(\S+)")
1230
1231     i = 0
1232     while i < len(document.body):
1233         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1234             # Track the encoding of the current line
1235             result = lang_re.match(document.body[i])
1236             if result:
1237                 language = result.group(1)
1238                 if language == "default":
1239                     encoding_stack[-1] = document.encoding
1240                 else:
1241                     from lyx2lyx_lang import lang
1242                     encoding_stack[-1] = lang[language][3]
1243                 continue
1244             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1245                 encoding_stack.append(encoding_stack[-1])
1246                 continue
1247             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1248                 del encoding_stack[-1]
1249                 continue
1250
1251         for j in range(len(document.body[i])):
1252             # dotless i and dotless j are both in special_accent_map and can
1253             # occur as an accented character, so we need to test that the
1254             # following character is no accent
1255             if (document.body[i][j] in inverse_special_accent_map and
1256                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1257                 accent = document.body[i][j]
1258                 try:
1259                     dummy = accent.encode(encoding_stack[-1])
1260                 except UnicodeEncodeError:
1261                     # Insert the rest of the line as new line
1262                     if j < len(document.body[i]) - 1:
1263                         document.body.insert(i+1, document.body[i][j+1:])
1264                     # Delete the accented character
1265                     document.body[i] = document.body[i][:j]
1266                     # Finally add the InsetLaTeXAccent
1267                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1268                     break
1269             elif j > 0 and document.body[i][j] in inverse_accent_map:
1270                 accented_char = document.body[i][j-1]
1271                 if accented_char == ' ':
1272                     # Conform to LyX output
1273                     accented_char = ''
1274                 elif accented_char in inverse_accented_map:
1275                     accented_char = inverse_accented_map[accented_char]
1276                 accent = document.body[i][j]
1277                 try:
1278                     dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1279                 except UnicodeEncodeError:
1280                     # Insert the rest of the line as new line
1281                     if j < len(document.body[i]) - 1:
1282                         document.body.insert(i+1, document.body[i][j+1:])
1283                     # Delete the accented characters
1284                     document.body[i] = document.body[i][:j-1]
1285                     # Finally add the InsetLaTeXAccent
1286                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1287                     break
1288         i = i + 1
1289
1290     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1291     for i in range(len(document.body)):
1292         document.body[i] = normalize("NFC", document.body[i])
1293
1294
1295 def normalize_font_whitespace_259(document):
1296     """ Before format 259 the font changes were ignored if a
1297     whitespace was the first or last character in the sequence, this function
1298     transfers the whitespace outside."""
1299
1300     char_properties = {"\\series": "default",
1301                        "\\emph": "default",
1302                        "\\color": "none",
1303                        "\\shape": "default",
1304                        "\\bar": "default",
1305                        "\\family": "default"}
1306     return normalize_font_whitespace(document, char_properties)
1307
1308 def normalize_font_whitespace_274(document):
1309     """ Before format 259 (sic) the font changes were ignored if a
1310     whitespace was the first or last character in the sequence. This was
1311     corrected for most font properties in format 259, but the language
1312     was forgotten then. This function applies the same conversion done
1313     there (namely, transfers the whitespace outside) for font language
1314     changes, as well."""
1315
1316     char_properties = {"\\lang": "default"}
1317     return normalize_font_whitespace(document, char_properties)
1318
1319 def get_paragraph_language(document, i):
1320     """ Return the language of the paragraph in which line i of the document
1321     body is. If the first thing in the paragraph is a \\lang command, that
1322     is the paragraph's langauge; otherwise, the paragraph's language is the
1323     document's language."""
1324
1325     lines = document.body
1326
1327     first_nonempty_line = \
1328         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1329
1330     words = lines[first_nonempty_line].split()
1331
1332     if len(words) > 1 and words[0] == "\\lang":
1333         return words[1]
1334     else:
1335         return document.language
1336
1337 def normalize_font_whitespace(document, char_properties):
1338     """ Before format 259 the font changes were ignored if a
1339     whitespace was the first or last character in the sequence, this function
1340     transfers the whitespace outside. Only a change in one of the properties
1341     in the provided     char_properties is handled by this function."""
1342
1343     if document.backend != "latex":
1344         return
1345
1346     lines = document.body
1347
1348     changes = {}
1349
1350     i = 0
1351     while i < len(lines):
1352         words = lines[i].split()
1353
1354         if len(words) > 0 and words[0] == "\\begin_layout":
1355             # a new paragraph resets all font changes
1356             changes.clear()
1357             # also reset the default language to be the paragraph's language
1358             if "\\lang" in list(char_properties.keys()):
1359                 char_properties["\\lang"] = \
1360                     get_paragraph_language(document, i + 1)
1361
1362         elif len(words) > 1 and words[0] in list(char_properties.keys()):
1363             # we have a font change
1364             if char_properties[words[0]] == words[1]:
1365                 # property gets reset
1366                 if words[0] in list(changes.keys()):
1367                     del changes[words[0]]
1368                 defaultproperty = True
1369             else:
1370                 # property gets set
1371                 changes[words[0]] = words[1]
1372                 defaultproperty = False
1373
1374             # We need to explicitly reset all changed properties if we find
1375             # a space below, because LyX 1.4 would output the space after
1376             # closing the previous change and before starting the new one,
1377             # and closing a font change means to close all properties, not
1378             # just the changed one.
1379
1380             if lines[i-1] and lines[i-1][-1] == " ":
1381                 lines[i-1] = lines[i-1][:-1]
1382                 # a space before the font change
1383                 added_lines = [" "]
1384                 for k in list(changes.keys()):
1385                     # exclude property k because that is already in lines[i]
1386                     if k != words[0]:
1387                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1388                 for k in list(changes.keys()):
1389                     # exclude property k because that must be added below anyway
1390                     if k != words[0]:
1391                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1392                 if defaultproperty:
1393                     # Property is reset in lines[i], so add the new stuff afterwards
1394                     lines[i+1:i+1] = added_lines
1395                 else:
1396                     # Reset property for the space
1397                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1398                     lines[i:i] = added_lines
1399                 i = i + len(added_lines)
1400
1401             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1402                 # a space after the font change
1403                 if (lines[i+1] == " " and lines[i+2]):
1404                     next_words = lines[i+2].split()
1405                     if len(next_words) > 0 and next_words[0] == words[0]:
1406                         # a single blank with a property different from the
1407                         # previous and the next line must not be changed
1408                         i = i + 2
1409                         continue
1410                 lines[i+1] = lines[i+1][1:]
1411                 added_lines = [" "]
1412                 for k in list(changes.keys()):
1413                     # exclude property k because that is already in lines[i]
1414                     if k != words[0]:
1415                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1416                 for k in list(changes.keys()):
1417                     # exclude property k because that must be added below anyway
1418                     if k != words[0]:
1419                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1420                 # Reset property for the space
1421                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1422                 lines[i:i] = added_lines
1423                 i = i + len(added_lines)
1424
1425         i = i + 1
1426
1427
1428 def revert_utf8x(document):
1429     " Set utf8x encoding to utf8. "
1430     i = find_token(document.header, "\\inputencoding", 0)
1431     if i == -1:
1432         document.header.append("\\inputencoding auto")
1433     else:
1434         inputenc = get_value(document.header, "\\inputencoding", i)
1435         if inputenc == "utf8x":
1436             document.header[i] = "\\inputencoding utf8"
1437     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1438
1439
1440 def revert_utf8plain(document):
1441     " Set utf8plain encoding to utf8. "
1442     i = find_token(document.header, "\\inputencoding", 0)
1443     if i == -1:
1444         document.header.append("\\inputencoding auto")
1445     else:
1446         inputenc = get_value(document.header, "\\inputencoding", i)
1447         if inputenc == "utf8-plain":
1448             document.header[i] = "\\inputencoding utf8"
1449     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1450
1451
1452 def revert_beamer_alert(document):
1453     " Revert beamer's \\alert inset back to ERT. "
1454     i = 0
1455     while True:
1456         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1457         if i == -1:
1458             return
1459         document.body[i] = "\\begin_inset ERT"
1460         i = i + 1
1461         while True:
1462             if (document.body[i][:13] == "\\begin_layout"):
1463                 # Insert the \alert command
1464                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1465                 break
1466             i = i + 1
1467
1468         i = i + 1
1469
1470
1471 def revert_beamer_structure(document):
1472     " Revert beamer's \\structure inset back to ERT. "
1473     i = 0
1474     while True:
1475         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1476         if i == -1:
1477             return
1478         document.body[i] = "\\begin_inset ERT"
1479         i = i + 1
1480         while True:
1481             if (document.body[i][:13] == "\\begin_layout"):
1482                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1483                 break
1484             i = i + 1
1485
1486         i = i + 1
1487
1488
1489 def convert_changes(document):
1490     " Switch output_changes off if tracking_changes is off. "
1491     i = find_token(document.header, '\\tracking_changes', 0)
1492     if i == -1:
1493         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1494         return
1495     j = find_token(document.header, '\\output_changes', 0)
1496     if j == -1:
1497         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1498         return
1499     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1500     output_changes = get_value(document.header, "\\output_changes", j)
1501     if tracking_changes == "false" and output_changes == "true":
1502         document.header[j] = "\\output_changes false"
1503
1504
1505 def revert_ascii(document):
1506     " Set ascii encoding to auto. "
1507     i = find_token(document.header, "\\inputencoding", 0)
1508     if i == -1:
1509         document.header.append("\\inputencoding auto")
1510     else:
1511         inputenc = get_value(document.header, "\\inputencoding", i)
1512         if inputenc == "ascii":
1513             document.header[i] = "\\inputencoding auto"
1514     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1515
1516
1517 def normalize_language_name(document):
1518     lang = { "brazil": "brazilian",
1519              "portuges": "portuguese"}
1520
1521     if document.language in lang:
1522         document.language = lang[document.language]
1523         i = find_token(document.header, "\\language", 0)
1524         document.header[i] = "\\language %s" % document.language
1525
1526
1527 def revert_language_name(document):
1528     lang = { "brazilian": "brazil",
1529              "portuguese": "portuges"}
1530
1531     if document.language in lang:
1532         document.language = lang[document.language]
1533         i = find_token(document.header, "\\language", 0)
1534         document.header[i] = "\\language %s" % document.language
1535
1536 #
1537 #  \textclass cv -> \textclass simplecv
1538 def convert_cv_textclass(document):
1539     if document.textclass == "cv":
1540         document.textclass = "simplecv"
1541
1542
1543 def revert_cv_textclass(document):
1544     if document.textclass == "simplecv":
1545         document.textclass = "cv"
1546
1547
1548 #
1549 # add scaleBeforeRotation graphics param
1550 def convert_graphics_rotation(document):
1551     " add scaleBeforeRotation graphics parameter. "
1552     i = 0
1553     while True:
1554         i = find_token(document.body, "\\begin_inset Graphics", i)
1555         if i == -1:
1556             return
1557         j = find_end_of_inset(document.body, i+1)
1558         if j == -1:
1559             # should not happen
1560             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1561         # Seach for rotateAngle and width or height or scale
1562         # If these params are not there, nothing needs to be done.
1563         k = find_token(document.body, "\trotateAngle", i + 1, j)
1564         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1565         if (k != -1 and l != -1):
1566             document.body.insert(j, 'scaleBeforeRotation')
1567         i = i + 1
1568
1569
1570 #
1571 # remove scaleBeforeRotation graphics param
1572 def revert_graphics_rotation(document):
1573     " remove scaleBeforeRotation graphics parameter. "
1574     i = 0
1575     while True:
1576         i = find_token(document.body, "\\begin_inset Graphics", i)
1577         if i == -1:
1578             return
1579         j = find_end_of_inset(document.body, i + 1)
1580         if j == -1:
1581             # should not happen
1582             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1583         # If there's a scaleBeforeRotation param, just remove that
1584         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1585         if k != -1:
1586             del document.body[k]
1587         else:
1588             # if not, and if we have rotateAngle and width or height or scale,
1589             # we have to put the rotateAngle value to special
1590             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1591             special = get_value(document.body, 'special', i + 1, j)
1592             if rotateAngle != "":
1593                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1594                 if k == -1:
1595                     break
1596                 if special == "":
1597                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1598                 else:
1599                     l = find_token(document.body, "\tspecial", i + 1, j)
1600                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1601                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1602                 if k != -1:
1603                     del document.body[k]
1604         i = i + 1
1605
1606
1607
1608 def convert_tableborder(document):
1609     # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1610     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1611     i = 0
1612     while i < len(document.body):
1613         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1614         k = document.body[i].find("|>{", 0, len(document.body[i]))
1615         # the two tokens have to be in one line
1616         if (h != -1 and k != -1):
1617             # delete the "|"
1618             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
1619         i = i + 1
1620
1621
1622 def revert_tableborder(document):
1623     i = 0
1624     while i < len(document.body):
1625         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1626         k = document.body[i].find(">{", 0, len(document.body[i]))
1627         # the two tokens have to be in one line
1628         if (h != -1 and k != -1):
1629             # add the "|"
1630             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1631         i = i + 1
1632
1633
1634 def revert_armenian(document):
1635
1636     # set inputencoding from armscii8 to auto
1637     if document.inputencoding == "armscii8":
1638         i = find_token(document.header, "\\inputencoding", 0)
1639         if i != -1:
1640             document.header[i] = "\\inputencoding auto"
1641     # check if preamble exists, if not k is set to -1
1642     i = 0
1643     k = -1
1644     while i < len(document.preamble):
1645         if k == -1:
1646             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1647         if k == -1:
1648             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1649         i = i + 1
1650     # add the entry \usepackage{armtex} to the document preamble
1651     if document.language == "armenian":
1652         # set the armtex entry as the first preamble line
1653         if k != -1:
1654             document.preamble[0:0] = ["\\usepackage{armtex}"]
1655         # create the preamble when it doesn't exist
1656         else:
1657             document.preamble.append('\\usepackage{armtex}')
1658     # Set document language from armenian to english
1659     if document.language == "armenian":
1660         document.language = "english"
1661         i = find_token(document.header, "\\language", 0)
1662         if i != -1:
1663             document.header[i] = "\\language english"
1664
1665
1666 def revert_CJK(document):
1667     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1668     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1669                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1670     i = find_token(document.header, "\\inputencoding", 0)
1671     if i == -1:
1672         document.header.append("\\inputencoding auto")
1673     else:
1674         inputenc = get_value(document.header, "\\inputencoding", i)
1675         if inputenc in encodings:
1676             document.header[i] = "\\inputencoding default"
1677     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1678
1679     if document.language == "chinese-simplified" or \
1680        document.language == "chinese-traditional" or \
1681        document.language == "japanese" or document.language == "korean":
1682         document.language = "english"
1683         i = find_token(document.header, "\\language", 0)
1684         if i != -1:
1685             document.header[i] = "\\language english"
1686
1687
1688 def revert_preamble_listings_params(document):
1689     " Revert preamble option \listings_params "
1690     i = find_token(document.header, "\\listings_params", 0)
1691     if i != -1:
1692         document.preamble.append('\\usepackage{listings}')
1693         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1694         document.header.pop(i);
1695
1696
1697 def revert_listings_inset(document):
1698     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1699 FROM
1700
1701 \begin_inset
1702 lstparams "language=Delphi"
1703 inline true
1704 status open
1705
1706 \begin_layout Standard
1707 var i = 10;
1708 \end_layout
1709
1710 \end_inset
1711
1712 TO
1713
1714 \begin_inset ERT
1715 status open
1716 \begin_layout Standard
1717
1718
1719 \backslash
1720 lstinline[language=Delphi]{var i = 10;}
1721 \end_layout
1722
1723 \end_inset
1724
1725 There can be an caption inset in this inset
1726
1727 \begin_layout Standard
1728 \begin_inset Caption
1729
1730 \begin_layout Standard
1731 before label
1732 \begin_inset LatexCommand label
1733 name "lst:caption"
1734
1735 \end_inset
1736
1737 after label
1738 \end_layout
1739
1740 \end_inset
1741
1742
1743 \end_layout
1744
1745 '''
1746     i = 0
1747     while True:
1748         i = find_token(document.body, '\\begin_inset listings', i)
1749         if i == -1:
1750             break
1751         else:
1752             if not '\\usepackage{listings}' in document.preamble:
1753                 document.preamble.append('\\usepackage{listings}')
1754         j = find_end_of_inset(document.body, i + 1)
1755         if j == -1:
1756             # this should not happen
1757             break
1758         inline = 'false'
1759         params = ''
1760         status = 'open'
1761         # first three lines
1762         for line in range(i + 1, i + 4):
1763             if document.body[line].startswith('inline'):
1764                 inline = document.body[line].split()[1]
1765             if document.body[line].startswith('lstparams'):
1766                 params = document.body[line].split()[1].strip('"')
1767             if document.body[line].startswith('status'):
1768                 status = document.body[line].split()[1].strip()
1769                 k = line + 1
1770         # caption?
1771         caption = ''
1772         label = ''
1773         cap = find_token(document.body, '\\begin_inset Caption', i)
1774         if cap != -1:
1775             cap_end = find_end_of_inset(document.body, cap + 1)
1776             if cap_end == -1:
1777                 # this should not happen
1778                 break
1779             # label?
1780             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1781             if lbl != -1:
1782                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1783                 if lbl_end == -1:
1784                     # this should not happen
1785                     break
1786             else:
1787                 lbl = cap_end
1788                 lbl_end = cap_end
1789             for line in document.body[lbl : lbl_end + 1]:
1790                 if line.startswith('name '):
1791                     label = line.split()[1].strip('"')
1792                     break
1793             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1794                 if not line.startswith('\\'):
1795                     caption += line.strip()
1796             k = cap_end + 1
1797         inlinecode = ''
1798         # looking for the oneline code for lstinline
1799         inlinecode = document.body[find_end_of_layout(document.body,
1800             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1801         if len(caption) > 0:
1802             if len(params) == 0:
1803                 params = 'caption={%s}' % caption
1804             else:
1805                 params += ',caption={%s}' % caption
1806         if len(label) > 0:
1807             if len(params) == 0:
1808                 params = 'label={%s}' % label
1809             else:
1810                 params += ',label={%s}' % label
1811         if len(params) > 0:
1812             params = '[%s]' % params
1813             params = params.replace('\\', '\\backslash\n')
1814         if inline == 'true':
1815             document.body[i:(j+1)] = [r'\begin_inset ERT',
1816                                       'status %s' % status,
1817                                       r'\begin_layout %s' % document.default_layout,
1818                                       '',
1819                                       '',
1820                                       r'\backslash',
1821                                       'lstinline%s{%s}' % (params, inlinecode),
1822                                       r'\end_layout',
1823                                       '',
1824                                       r'\end_inset']
1825         else:
1826             document.body[i: j+1] =  [r'\begin_inset ERT',
1827                                       'status %s' % status,
1828                                       '',
1829                                       r'\begin_layout %s' % document.default_layout,
1830                                       '',
1831                                       '',
1832                                       r'\backslash',
1833                                       r'begin{lstlisting}%s' % params,
1834                                       r'\end_layout',
1835                                       '',
1836                                       r'\begin_layout %s' % document.default_layout,
1837                                     ] + document.body[k : j - 1] + \
1838                                      ['',
1839                                       r'\begin_layout %s' % document.default_layout,
1840                                       '',
1841                                       r'\backslash',
1842                                       'end{lstlisting}',
1843                                       r'\end_layout',
1844                                       '',
1845                                       r'\end_inset']
1846
1847
1848 def revert_include_listings(document):
1849     r''' Revert lstinputlisting Include option , translate
1850 \begin_inset Include \lstinputlisting{file}[opt]
1851 preview false
1852
1853 \end_inset
1854
1855 TO
1856
1857 \begin_inset ERT
1858 status open
1859
1860 \begin_layout Standard
1861
1862
1863 \backslash
1864 lstinputlisting{file}[opt]
1865 \end_layout
1866
1867 \end_inset
1868     '''
1869
1870     i = 0
1871     while True:
1872         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1873         if i == -1:
1874             break
1875         else:
1876             if not '\\usepackage{listings}' in document.preamble:
1877                 document.preamble.append('\\usepackage{listings}')
1878         j = find_end_of_inset(document.body, i + 1)
1879         if j == -1:
1880             # this should not happen
1881             break
1882         # find command line lstinputlisting{file}[options]
1883         cmd, file, option = '', '', ''
1884         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1885             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1886         option = option.replace('\\', '\\backslash\n')
1887         document.body[i : j + 1] = [r'\begin_inset ERT',
1888                                     'status open',
1889                                     '',
1890                                     r'\begin_layout %s' % document.default_layout,
1891                                     '',
1892                                     '',
1893                                     r'\backslash',
1894                                     '%s%s{%s}' % (cmd, option, file),
1895                                     r'\end_layout',
1896                                     '',
1897                                     r'\end_inset']
1898
1899
1900 def revert_ext_font_sizes(document):
1901     if document.backend != "latex": return
1902     if not document.textclass.startswith("ext"): return
1903
1904     fontsize = get_value(document.header, '\\paperfontsize', 0)
1905     if fontsize not in ('10', '11', '12'): return
1906     fontsize += 'pt'
1907
1908     i = find_token(document.header, '\\paperfontsize', 0)
1909     document.header[i] = '\\paperfontsize default'
1910
1911     i = find_token(document.header, '\\options', 0)
1912     if i == -1:
1913         i = find_token(document.header, '\\textclass', 0) + 1
1914         document.header[i:i] = ['\\options %s' % fontsize]
1915     else:
1916         document.header[i] += ',%s' % fontsize
1917
1918
1919 def convert_ext_font_sizes(document):
1920     if document.backend != "latex": return
1921     if not document.textclass.startswith("ext"): return
1922
1923     fontsize = get_value(document.header, '\\paperfontsize', 0)
1924     if fontsize != 'default': return
1925
1926     i = find_token(document.header, '\\options', 0)
1927     if i == -1: return
1928
1929     options = get_value(document.header, '\\options', i)
1930
1931     fontsizes = '10pt', '11pt', '12pt'
1932     for fs in fontsizes:
1933         if options.find(fs) != -1:
1934             break
1935     else: # this else will only be attained if the for cycle had no match
1936         return
1937
1938     options = options.split(',')
1939     for j, opt in enumerate(options):
1940         if opt in fontsizes:
1941             fontsize = opt[:-2]
1942             del options[j]
1943             break
1944     else:
1945         return
1946
1947     k = find_token(document.header, '\\paperfontsize', 0)
1948     document.header[k] = '\\paperfontsize %s' % fontsize
1949
1950     if options:
1951         document.header[i] = '\\options %s' % ','.join(options)
1952     else:
1953         del document.header[i]
1954
1955
1956 def revert_separator_layout(document):
1957     r'''Revert --Separator-- to a lyx note
1958 From
1959
1960 \begin_layout --Separator--
1961 something
1962 \end_layout
1963
1964 to
1965
1966 \begin_layout Standard
1967 \begin_inset Note Note
1968 status open
1969
1970 \begin_layout Standard
1971 Separate Evironment
1972 \end_layout
1973
1974 \end_inset
1975 something
1976
1977 \end_layout
1978
1979     '''
1980
1981     i = 0
1982     while True:
1983         i = find_token(document.body, r'\begin_layout --Separator--', i)
1984         if i == -1:
1985             break
1986         j = find_end_of_layout(document.body, i + 1)
1987         if j == -1:
1988             # this should not happen
1989             break
1990         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1991                                     r'\begin_inset Note Note',
1992                                     'status open',
1993                                     '',
1994                                     r'\begin_layout %s' % document.default_layout,
1995                                     'Separate Environment',
1996                                     r'\end_layout',
1997                                     '',
1998                                     r'\end_inset'] + \
1999                                     document.body[ i + 1 : j] + \
2000                                     ['',
2001                                     r'\end_layout'
2002                                     ]
2003
2004
2005 def convert_arabic (document):
2006     if document.language == "arabic":
2007         document.language = "arabic_arabtex"
2008         i = find_token(document.header, "\\language", 0)
2009         if i != -1:
2010             document.header[i] = "\\language arabic_arabtex"
2011     i = 0
2012     while i < len(document.body):
2013         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
2014         if (h != -1):
2015             # change the language name
2016             document.body[i] = '\lang arabic_arabtex'
2017         i = i + 1
2018
2019
2020 def revert_arabic (document):
2021     if document.language == "arabic_arabtex":
2022         document.language = "arabic"
2023         i = find_token(document.header, "\\language", 0)
2024         if i != -1:
2025             document.header[i] = "\\language arabic"
2026     i = 0
2027     while i < len(document.body):
2028         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2029         if (h != -1):
2030             # change the language name
2031             document.body[i] = '\lang arabic'
2032         i = i + 1
2033
2034
2035 ##
2036 # Conversion hub
2037 #
2038
2039 supported_versions = ["1.5.0","1.5"]
2040 convert = [[246, []],
2041            [247, [convert_font_settings]],
2042            [248, []],
2043            [249, [convert_utf8]],
2044            [250, []],
2045            [251, []],
2046            [252, [convert_commandparams, convert_bibitem]],
2047            [253, []],
2048            [254, [convert_esint]],
2049            [255, []],
2050            [256, []],
2051            [257, [convert_caption]],
2052            [258, [convert_lyxline]],
2053            [259, [convert_accent, normalize_font_whitespace_259]],
2054            [260, []],
2055            [261, [convert_changes]],
2056            [262, []],
2057            [263, [normalize_language_name]],
2058            [264, [convert_cv_textclass]],
2059            [265, [convert_tableborder]],
2060            [266, []],
2061            [267, []],
2062            [268, []],
2063            [269, []],
2064            [270, []],
2065            [271, [convert_ext_font_sizes]],
2066            [272, []],
2067            [273, []],
2068            [274, [normalize_font_whitespace_274]],
2069            [275, [convert_graphics_rotation]],
2070            [276, [convert_arabic]]
2071           ]
2072
2073 revert =  [
2074            [275, [revert_arabic]],
2075            [274, [revert_graphics_rotation]],
2076            [273, []],
2077            [272, [revert_separator_layout]],
2078            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2079            [270, [revert_ext_font_sizes]],
2080            [269, [revert_beamer_alert, revert_beamer_structure]],
2081            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2082            [267, [revert_CJK]],
2083            [266, [revert_utf8plain]],
2084            [265, [revert_armenian]],
2085            [264, [revert_tableborder]],
2086            [263, [revert_cv_textclass]],
2087            [262, [revert_language_name]],
2088            [261, [revert_ascii]],
2089            [260, []],
2090            [259, [revert_utf8x]],
2091            [258, []],
2092            [257, []],
2093            [256, [revert_caption]],
2094            [255, [revert_encodings]],
2095            [254, [revert_clearpage, revert_cleardoublepage]],
2096            [253, [revert_esint]],
2097            [252, [revert_nomenclature, revert_printnomenclature]],
2098            [251, [revert_commandparams]],
2099            [250, [revert_cs_label]],
2100            [249, []],
2101            [248, [revert_accent, revert_utf8, revert_unicode]],
2102            [247, [revert_booktabs]],
2103            [246, [revert_font_settings]],
2104            [245, [revert_framed]]]
2105
2106
2107 if __name__ == "__main__":
2108     pass