lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if not forward: # no need to read file unless we are reverting
 257         spec_chars = read_unicodesymbols()
 258
 259     if document.inputencoding == "auto" or document.inputencoding == "default":
 260         i = 0
 261         while i < len(document.body):
 262             result = lang_re.match(document.body[i])
 263             if result:
 264                 language = result.group(1)
 265                 if language == "default":
 266                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 267                     encoding_stack[-1] = document.encoding
 268                 else:
 269                     from lyx2lyx_lang import lang
 270                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 271                     encoding_stack[-1] = lang[language][3]
 272             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 273                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 274                 if len(insets) > 0 and insets[-1] in inset_types:
 275                     from lyx2lyx_lang import lang
 276                     encoding_stack.append(lang[document.language][3])
 277                 else:
 278                     encoding_stack.append(encoding_stack[-1])
 279             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 280                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 281                 if len(encoding_stack) == 1:
 282                     # Don't remove the document encoding from the stack
 283                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 284                 else:
 285                     del encoding_stack[-1]
 286             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 287                 inset_result = inset_re.match(document.body[i])
 288                 if inset_result:
 289                     insets.append(inset_result.group(1))
 290                 else:
 291                     insets.append("")
 292             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 293                 del insets[-1]
 294             if encoding_stack[-1] != document.encoding:
 295                 if forward:
 296                     # This line has been incorrectly interpreted as if it was
 297                     # encoded in 'encoding'.
 298                     # Convert back to the 8bit string that was in the file.
 299                     orig = document.body[i].encode(document.encoding)
 300                     # Convert the 8bit string that was in the file to unicode
 301                     # with the correct encoding.
 302                     document.body[i] = orig.decode(encoding_stack[-1])
 303                 else:
 304                     try:
 305                         # Convert unicode to the 8bit string that will be written
 306                         # to the file with the correct encoding.
 307                         orig = document.body[i].encode(encoding_stack[-1])
 308                         # Convert the 8bit string that will be written to the
 309                         # file to fake unicode with the encoding that will later
 310                         # be used when writing to the file.
 311                         document.body[i] = orig.decode(document.encoding)
 312                     except:
 313                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 314                         document.body[i:i+1] = mod_line.split('\n')
 315                         i += len(mod_line.split('\n')) - 1
 316             i += 1
 317
 318
 319 def convert_utf8(document):
 320     " Set document encoding to UTF-8. "
 321     convert_multiencoding(document, True)
 322     document.encoding = "utf8"
 323
 324
 325 def revert_utf8(document):
 326     " Set document encoding to the value corresponding to inputencoding. "
 327     i = find_token(document.header, "\\inputencoding", 0)
 328     if i == -1:
 329         document.header.append("\\inputencoding auto")
 330     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 331         document.header[i] = "\\inputencoding auto"
 332     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 333     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 334     convert_multiencoding(document, False)
 335
 336
 337 def read_unicodesymbols():
 338     " Read the unicodesymbols list of unicode characters and corresponding commands."
 339     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 340     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 341     spec_chars = {}
 342     for line in fp.readlines():
 343         if line[0] != '#':
 344             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 345             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 346             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 347             try:
 348                 # flag1 and flag2 are preamble and other flags
 349                 [ucs4,command,flag1,flag2] =line.split(None,3)
 350                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 351             except:
 352                 pass
 353     fp.close()
 354     return spec_chars
 355
 356
 357 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 358     # Define strings to start and end ERT and math insets
 359     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 360     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 361     math_intro='\n\\begin_inset Formula $'
 362     math_outro='$\n\\end_inset'
 363
 364     mod_line = u''
 365     if i and document.body[i - 1][:1] != '\\':
 366         last_char = document.body[i - 1][-1:]
 367     else:
 368         last_char = ''
 369
 370     line = document.body[i]
 371     for character in line:
 372         try:
 373             # Try to write the character
 374             dummy = character.encode(document.encoding)
 375             mod_line += character
 376             last_char = character
 377         except:
 378             # Try to replace with ERT/math inset
 379             if spec_chars.has_key(character):
 380                 command = spec_chars[character][0] # the command to replace unicode
 381                 flag1 = spec_chars[character][1]
 382                 flag2 = spec_chars[character][2]
 383                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 384                     # We have a character that should be combined with the previous
 385                     command += '{' + last_char + '}'
 386                     # Remove the last character. Ignore if it is whitespace
 387                     if len(last_char.rstrip()):
 388                         # last_char was found and is not whitespace
 389                         if mod_line:
 390                             mod_line = mod_line[:-1]
 391                         else: # last_char belongs to the last line
 392                             document.body[i-1] = document.body[i-1][:-1]
 393                     else:
 394                         # The last character was replaced by a command. For now it is
 395                         # ignored. This could be handled better.
 396                         pass
 397                 if command[0:2] == '\\\\':
 398                     if command[2:12]=='ensuremath':
 399                         if insets and insets[-1] == "ERT":
 400                             # math in ERT
 401                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 402                             command = command.replace('}', '$\n')
 403                         elif not insets or insets[-1] != "Formula":
 404                             # add a math inset with the replacement character
 405                             command = command.replace('\\\\ensuremath{\\', math_intro)
 406                             command = command.replace('}', math_outro)
 407                         else:
 408                             # we are already in a math inset
 409                             command = command.replace('\\\\ensuremath{\\', '')
 410                             command = command.replace('}', '')
 411                     else:
 412                         if insets and insets[-1] == "Formula":
 413                             # avoid putting an ERT in a math; instead put command as text
 414                             command = command.replace('\\\\', '\mathrm{')
 415                             command = command + '}'
 416                         elif not insets or insets[-1] != "ERT":
 417                             # add an ERT inset with the replacement character
 418                             command = command.replace('\\\\', ert_intro)
 419                             command = command + ert_outro
 420                         else:
 421                             command = command.replace('\\\\', '\n\\backslash\n')
 422                     last_char = '' # indicate that the character should not be removed
 423                 mod_line += command
 424             else:
 425                 # Replace with replacement string
 426                 mod_line += replacement_character
 427     return mod_line
 428
 429
 430 def revert_unicode(document):
 431     '''Transform unicode characters that can not be written using the
 432 document encoding to commands according to the unicodesymbols
 433 file. Characters that can not be replaced by commands are replaced by
 434 an replacement string.  Flags other than 'combined' are currently not
 435 implemented.'''
 436     spec_chars = read_unicodesymbols()
 437     insets = [] # list of active insets
 438
 439     # Go through the document to capture all combining characters
 440     i = 0
 441     while i < len(document.body):
 442         line = document.body[i]
 443         # Check for insets
 444         if line.find('\\begin_inset') > -1:
 445             insets.append(line[13:].split()[0])
 446         if line.find('\\end_inset') > -1:
 447             del insets[-1]
 448
 449         # Try to write the line
 450         try:
 451             # If all goes well the line is written here
 452             dummy = line.encode(document.encoding)
 453             i += 1
 454         except:
 455             # Error, some character(s) in the line need to be replaced
 456             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 457             document.body[i:i+1] = mod_line.split('\n')
 458             i += len(mod_line.split('\n'))
 459
 460
 461 def revert_cs_label(document):
 462     " Remove status flag of charstyle label. "
 463     i = 0
 464     while 1:
 465         i = find_token(document.body, "\\begin_inset CharStyle", i)
 466         if i == -1:
 467             return
 468         # Seach for a line starting 'show_label'
 469         # If it is not there, break with a warning message
 470         i = i + 1
 471         while 1:
 472             if (document.body[i][:10] == "show_label"):
 473                 del document.body[i]
 474                 break
 475             elif (document.body[i][:13] == "\\begin_layout"):
 476                 document.warning("Malformed LyX document: Missing 'show_label'.")
 477                 break
 478             i = i + 1
 479
 480         i = i + 1
 481
 482
 483 def convert_bibitem(document):
 484     """ Convert
 485 \bibitem [option]{argument}
 486
 487 to
 488
 489 \begin_inset LatexCommand bibitem
 490 label "option"
 491 key "argument"
 492
 493 \end_inset
 494
 495 This must be called after convert_commandparams.
 496 """
 497     i = 0
 498     while 1:
 499         i = find_token(document.body, "\\bibitem", i)
 500         if i == -1:
 501             break
 502         j = document.body[i].find('[') + 1
 503         k = document.body[i].rfind(']')
 504         if j == 0: # No optional argument found
 505             option = None
 506         else:
 507             option = document.body[i][j:k]
 508         j = document.body[i].rfind('{') + 1
 509         k = document.body[i].rfind('}')
 510         argument = document.body[i][j:k]
 511         lines = ['\\begin_inset LatexCommand bibitem']
 512         if option != None:
 513             lines.append('label "%s"' % option.replace('"', '\\"'))
 514         lines.append('key "%s"' % argument.replace('"', '\\"'))
 515         lines.append('')
 516         lines.append('\\end_inset')
 517         document.body[i:i+1] = lines
 518         i = i + 1
 519
 520
 521 commandparams_info = {
 522     # command : [option1, option2, argument]
 523     "bibitem" : ["label", "", "key"],
 524     "bibtex" : ["options", "btprint", "bibfiles"],
 525     "cite"        : ["after", "before", "key"],
 526     "citet"       : ["after", "before", "key"],
 527     "citep"       : ["after", "before", "key"],
 528     "citealt"     : ["after", "before", "key"],
 529     "citealp"     : ["after", "before", "key"],
 530     "citeauthor"  : ["after", "before", "key"],
 531     "citeyear"    : ["after", "before", "key"],
 532     "citeyearpar" : ["after", "before", "key"],
 533     "citet*"      : ["after", "before", "key"],
 534     "citep*"      : ["after", "before", "key"],
 535     "citealt*"    : ["after", "before", "key"],
 536     "citealp*"    : ["after", "before", "key"],
 537     "citeauthor*" : ["after", "before", "key"],
 538     "Citet"       : ["after", "before", "key"],
 539     "Citep"       : ["after", "before", "key"],
 540     "Citealt"     : ["after", "before", "key"],
 541     "Citealp"     : ["after", "before", "key"],
 542     "Citeauthor"  : ["after", "before", "key"],
 543     "Citet*"      : ["after", "before", "key"],
 544     "Citep*"      : ["after", "before", "key"],
 545     "Citealt*"    : ["after", "before", "key"],
 546     "Citealp*"    : ["after", "before", "key"],
 547     "Citeauthor*" : ["after", "before", "key"],
 548     "citefield"   : ["after", "before", "key"],
 549     "citetitle"   : ["after", "before", "key"],
 550     "cite*"       : ["after", "before", "key"],
 551     "hfill" : ["", "", ""],
 552     "index"      : ["", "", "name"],
 553     "printindex" : ["", "", "name"],
 554     "label" : ["", "", "name"],
 555     "eqref"     : ["name", "", "reference"],
 556     "pageref"   : ["name", "", "reference"],
 557     "prettyref" : ["name", "", "reference"],
 558     "ref"       : ["name", "", "reference"],
 559     "vpageref"  : ["name", "", "reference"],
 560     "vref"      : ["name", "", "reference"],
 561     "tableofcontents" : ["", "", "type"],
 562     "htmlurl" : ["name", "", "target"],
 563     "url"     : ["name", "", "target"]}
 564
 565
 566 def convert_commandparams(document):
 567     """ Convert
 568
 569  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 570  \end_inset
 571
 572  to
 573
 574  \begin_inset LatexCommand cmdname
 575  name1 "opt1"
 576  name2 "opt2"
 577  name3 "arg"
 578  \end_inset
 579
 580  name1, name2 and name3 can be different for each command.
 581 """
 582     # \begin_inset LatexCommand bibitem was not the official version (see
 583     # convert_bibitem()), but could be read in, so we convert it here, too.
 584
 585     i = 0
 586     while 1:
 587         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 588         if i == -1:
 589             break
 590         command = document.body[i][26:].strip()
 591         if command == "":
 592             document.warning("Malformed LyX document: Missing LatexCommand name.")
 593             i = i + 1
 594             continue
 595
 596         j = find_token(document.body, "\\end_inset", i + 1)
 597         if j == -1:
 598             document.warning("Malformed document")
 599         else:
 600             command += "".join(document.body[i+1:j])
 601             document.body[i+1:j] = []
 602
 603         # The following parser is taken from the original InsetCommandParams::scanCommand
 604         name = ""
 605         option1 = ""
 606         option2 = ""
 607         argument = ""
 608         state = "WS"
 609         # Used to handle things like \command[foo[bar]]{foo{bar}}
 610         nestdepth = 0
 611         b = 0
 612         for c in command:
 613             if ((state == "CMDNAME" and c == ' ') or
 614                 (state == "CMDNAME" and c == '[') or
 615                 (state == "CMDNAME" and c == '{')):
 616                 state = "WS"
 617             if ((state == "OPTION" and c == ']') or
 618                 (state == "SECOPTION" and c == ']') or
 619                 (state == "CONTENT" and c == '}')):
 620                 if nestdepth == 0:
 621                     state = "WS"
 622                 else:
 623                     nestdepth = nestdepth - 1
 624             if ((state == "OPTION" and c == '[') or
 625                 (state == "SECOPTION" and c == '[') or
 626                 (state == "CONTENT" and c == '{')):
 627                 nestdepth = nestdepth + 1
 628             if state == "CMDNAME":
 629                     name += c
 630             elif state == "OPTION":
 631                     option1 += c
 632             elif state == "SECOPTION":
 633                     option2 += c
 634             elif state == "CONTENT":
 635                     argument += c
 636             elif state == "WS":
 637                 if c == '\\':
 638                     state = "CMDNAME"
 639                 elif c == '[' and b != ']':
 640                     state = "OPTION"
 641                     nestdepth = 0 # Just to be sure
 642                 elif c == '[' and b == ']':
 643                     state = "SECOPTION"
 644                     nestdepth = 0 # Just to be sure
 645                 elif c == '{':
 646                     state = "CONTENT"
 647                     nestdepth = 0 # Just to be sure
 648             b = c
 649
 650         # Now we have parsed the command, output the parameters
 651         lines = ["\\begin_inset LatexCommand %s" % name]
 652         if option1 != "":
 653             if commandparams_info[name][0] == "":
 654                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 655             else:
 656                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 657         if option2 != "":
 658             if commandparams_info[name][1] == "":
 659                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 660             else:
 661                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 662         if argument != "":
 663             if commandparams_info[name][2] == "":
 664                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 665             else:
 666                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 667         document.body[i:i+1] = lines
 668         i = i + 1
 669
 670
 671 def revert_commandparams(document):
 672     regex = re.compile(r'(\S+)\s+(.+)')
 673     i = 0
 674     while 1:
 675         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 676         if i == -1:
 677             break
 678         name = document.body[i].split()[2]
 679         j = find_end_of_inset(document.body, i + 1)
 680         preview_line = ""
 681         option1 = ""
 682         option2 = ""
 683         argument = ""
 684         for k in range(i + 1, j):
 685             match = re.match(regex, document.body[k])
 686             if match:
 687                 pname = match.group(1)
 688                 pvalue = match.group(2)
 689                 if pname == "preview":
 690                     preview_line = document.body[k]
 691                 elif (commandparams_info[name][0] != "" and
 692                       pname == commandparams_info[name][0]):
 693                     option1 = pvalue.strip('"').replace('\\"', '"')
 694                 elif (commandparams_info[name][1] != "" and
 695                       pname == commandparams_info[name][1]):
 696                     option2 = pvalue.strip('"').replace('\\"', '"')
 697                 elif (commandparams_info[name][2] != "" and
 698                       pname == commandparams_info[name][2]):
 699                     argument = pvalue.strip('"').replace('\\"', '"')
 700             elif document.body[k].strip() != "":
 701                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 702         if name == "bibitem":
 703             if option1 == "":
 704                 lines = ["\\bibitem {%s}" % argument]
 705             else:
 706                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 707         else:
 708             if option1 == "":
 709                 if option2 == "":
 710                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 711                 else:
 712                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 713             else:
 714                 if option2 == "":
 715                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 716                 else:
 717                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 718         if name != "bibitem":
 719             if preview_line != "":
 720                 lines.append(preview_line)
 721             lines.append('')
 722             lines.append('\\end_inset')
 723         document.body[i:j+1] = lines
 724         i = j + 1
 725
 726
 727 def revert_nomenclature(document):
 728     " Convert nomenclature entry to ERT. "
 729     regex = re.compile(r'(\S+)\s+(.+)')
 730     i = 0
 731     use_nomencl = 0
 732     while 1:
 733         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 734         if i == -1:
 735             break
 736         use_nomencl = 1
 737         j = find_end_of_inset(document.body, i + 1)
 738         preview_line = ""
 739         symbol = ""
 740         description = ""
 741         prefix = ""
 742         for k in range(i + 1, j):
 743             match = re.match(regex, document.body[k])
 744             if match:
 745                 name = match.group(1)
 746                 value = match.group(2)
 747                 if name == "preview":
 748                     preview_line = document.body[k]
 749                 elif name == "symbol":
 750                     symbol = value.strip('"').replace('\\"', '"')
 751                 elif name == "description":
 752                     description = value.strip('"').replace('\\"', '"')
 753                 elif name == "prefix":
 754                     prefix = value.strip('"').replace('\\"', '"')
 755             elif document.body[k].strip() != "":
 756                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 757         if prefix == "":
 758             command = 'nomenclature{%s}{%s}' % (symbol, description)
 759         else:
 760             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 761         document.body[i:j+1] = ['\\begin_inset ERT',
 762                                 'status collapsed',
 763                                 '',
 764                                 '\\begin_layout %s' % document.default_layout,
 765                                 '',
 766                                 '',
 767                                 '\\backslash',
 768                                 command,
 769                                 '\\end_layout',
 770                                 '',
 771                                 '\\end_inset']
 772         i = i + 11
 773     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 774         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 775         document.preamble.append('\\makenomenclature')
 776
 777
 778 def revert_printnomenclature(document):
 779     " Convert printnomenclature to ERT. "
 780     regex = re.compile(r'(\S+)\s+(.+)')
 781     i = 0
 782     use_nomencl = 0
 783     while 1:
 784         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 785         if i == -1:
 786             break
 787         use_nomencl = 1
 788         j = find_end_of_inset(document.body, i + 1)
 789         preview_line = ""
 790         labelwidth = ""
 791         for k in range(i + 1, j):
 792             match = re.match(regex, document.body[k])
 793             if match:
 794                 name = match.group(1)
 795                 value = match.group(2)
 796                 if name == "preview":
 797                     preview_line = document.body[k]
 798                 elif name == "labelwidth":
 799                     labelwidth = value.strip('"').replace('\\"', '"')
 800             elif document.body[k].strip() != "":
 801                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 802         if labelwidth == "":
 803             command = 'nomenclature{}'
 804         else:
 805             command = 'nomenclature[%s]' % labelwidth
 806         document.body[i:j+1] = ['\\begin_inset ERT',
 807                                 'status collapsed',
 808                                 '',
 809                                 '\\begin_layout %s' % document.default_layout,
 810                                 '',
 811                                 '',
 812                                 '\\backslash',
 813                                 command,
 814                                 '\\end_layout',
 815                                 '',
 816                                 '\\end_inset']
 817         i = i + 11
 818     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 819         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 820         document.preamble.append('\\makenomenclature')
 821
 822
 823 def convert_esint(document):
 824     " Add \\use_esint setting to header. "
 825     i = find_token(document.header, "\\cite_engine", 0)
 826     if i == -1:
 827         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 828         return
 829     # 0 is off, 1 is auto, 2 is on.
 830     document.header.insert(i, '\\use_esint 0')
 831
 832
 833 def revert_esint(document):
 834     " Remove \\use_esint setting from header. "
 835     i = find_token(document.header, "\\use_esint", 0)
 836     if i == -1:
 837         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 838         return
 839     use_esint = document.header[i].split()[1]
 840     del document.header[i]
 841     # 0 is off, 1 is auto, 2 is on.
 842     if (use_esint == 2):
 843         document.preamble.append('\\usepackage{esint}')
 844
 845
 846 def revert_clearpage(document):
 847     " clearpage -> ERT "
 848     i = 0
 849     while 1:
 850         i = find_token(document.body, "\\clearpage", i)
 851         if i == -1:
 852             break
 853         document.body[i:i+1] =  ['\\begin_inset ERT',
 854                                 'status collapsed',
 855                                 '',
 856                                 '\\begin_layout %s' % document.default_layout,
 857                                 '',
 858                                 '',
 859                                 '\\backslash',
 860                                 'clearpage',
 861                                 '\\end_layout',
 862                                 '',
 863                                 '\\end_inset']
 864     i = i + 1
 865
 866
 867 def revert_cleardoublepage(document):
 868     " cleardoublepage -> ERT "
 869     i = 0
 870     while 1:
 871         i = find_token(document.body, "\\cleardoublepage", i)
 872         if i == -1:
 873             break
 874         document.body[i:i+1] =  ['\\begin_inset ERT',
 875                                 'status collapsed',
 876                                 '',
 877                                 '\\begin_layout %s' % document.default_layout,
 878                                 '',
 879                                 '',
 880                                 '\\backslash',
 881                                 'cleardoublepage',
 882                                 '\\end_layout',
 883                                 '',
 884                                 '\\end_inset']
 885     i = i + 1
 886
 887
 888 def convert_lyxline(document):
 889     " remove fontsize commands for \lyxline "
 890     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 891     # to change the line thickness. The new definiton does this so that imported
 892     # \lyxlines would have a different line thickness. The eventual fontsize command
 893     # before \lyxline is therefore removed to get the same output.
 894     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 895                  "large", "Large", "LARGE", "huge", "Huge"]
 896     for n in range(0, len(fontsizes)):
 897         i = 0
 898         k = 0
 899         while i < len(document.body):
 900             i = find_token(document.body, "\\size " + fontsizes[n], i)
 901             k = find_token(document.body, "\\lyxline", i)
 902             # the corresponding fontsize command is always 2 lines before the \lyxline
 903             if (i != -1 and k == i+2):
 904                 document.body[i:i+1] = []
 905             else:
 906                 break
 907         i = i + 1
 908
 909
 910 def revert_encodings(document):
 911     " Set new encodings to auto. "
 912     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 913                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 914                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 915     i = find_token(document.header, "\\inputencoding", 0)
 916     if i == -1:
 917         document.header.append("\\inputencoding auto")
 918     else:
 919         inputenc = get_value(document.header, "\\inputencoding", i)
 920         if inputenc in encodings:
 921             document.header[i] = "\\inputencoding auto"
 922     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 923
 924
 925 def convert_caption(document):
 926     " Convert caption layouts to caption insets. "
 927     i = 0
 928     while 1:
 929         i = find_token(document.body, "\\begin_layout Caption", i)
 930         if i == -1:
 931             return
 932         j = find_end_of_layout(document.body, i)
 933         if j == -1:
 934             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 935             return
 936
 937         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 938         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 939                             "\\begin_inset Caption", "",
 940                             "\\begin_layout %s" % document.default_layout]
 941         i = i + 1
 942
 943
 944 def revert_caption(document):
 945     " Convert caption insets to caption layouts. "
 946     " This assumes that the text class has a caption style. "
 947     i = 0
 948     while 1:
 949         i = find_token(document.body, "\\begin_inset Caption", i)
 950         if i == -1:
 951             return
 952
 953         # We either need to delete the previous \begin_layout line, or we
 954         # need to end the previous layout if this inset is not in the first
 955         # position of the paragraph.
 956         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 957         if layout_before == -1:
 958             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 959             return
 960         layout_line = document.body[layout_before]
 961         del_layout_before = True
 962         l = layout_before + 1
 963         while l < i:
 964             if document.body[l] != "":
 965                 del_layout_before = False
 966                 break
 967             l = l + 1
 968         if del_layout_before:
 969             del document.body[layout_before:i]
 970             i = layout_before
 971         else:
 972             document.body[i:i] = ["\\end_layout", ""]
 973             i = i + 2
 974
 975         # Find start of layout in the inset and end of inset
 976         j = find_token(document.body, "\\begin_layout", i)
 977         if j == -1:
 978             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 979             return
 980         k = find_end_of_inset(document.body, i)
 981         if k == -1:
 982             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 983             return
 984
 985         # We either need to delete the following \end_layout line, or we need
 986         # to restart the old layout if this inset is not at the paragraph end.
 987         layout_after = find_token(document.body, "\\end_layout", k)
 988         if layout_after == -1:
 989             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 990             return
 991         del_layout_after = True
 992         l = k + 1
 993         while l < layout_after:
 994             if document.body[l] != "":
 995                 del_layout_after = False
 996                 break
 997             l = l + 1
 998         if del_layout_after:
 999             del document.body[k+1:layout_after+1]
1000         else:
1001             document.body[k+1:k+1] = [layout_line, ""]
1002
1003         # delete \begin_layout and \end_inset and replace \begin_inset with
1004         # "\begin_layout Caption". This works because we can only have one
1005         # paragraph in the caption inset: The old \end_layout will be recycled.
1006         del document.body[k]
1007         if document.body[k] == "":
1008             del document.body[k]
1009         del document.body[j]
1010         if document.body[j] == "":
1011             del document.body[j]
1012         document.body[i] = "\\begin_layout Caption"
1013         if document.body[i+1] == "":
1014             del document.body[i+1]
1015         i = i + 1
1016
1017
1018 # Accents of InsetLaTeXAccent
1019 accent_map = {
1020     "`" : u'\u0300', # grave
1021     "'" : u'\u0301', # acute
1022     "^" : u'\u0302', # circumflex
1023     "~" : u'\u0303', # tilde
1024     "=" : u'\u0304', # macron
1025     "u" : u'\u0306', # breve
1026     "." : u'\u0307', # dot above
1027     "\"": u'\u0308', # diaeresis
1028     "r" : u'\u030a', # ring above
1029     "H" : u'\u030b', # double acute
1030     "v" : u'\u030c', # caron
1031     "b" : u'\u0320', # minus sign below
1032     "d" : u'\u0323', # dot below
1033     "c" : u'\u0327', # cedilla
1034     "k" : u'\u0328', # ogonek
1035     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1036                      # only one is given as argument, so we don't need to
1037                      # treat it differently.
1038 }
1039
1040
1041 # special accents of InsetLaTeXAccent without argument
1042 special_accent_map = {
1043     'i' : u'\u0131', # dotless i
1044     'j' : u'\u0237', # dotless j
1045     'l' : u'\u0142', # l with stroke
1046     'L' : u'\u0141'  # L with stroke
1047 }
1048
1049
1050 # special accent arguments of InsetLaTeXAccent
1051 accented_map = {
1052     '\\i' : u'\u0131', # dotless i
1053     '\\j' : u'\u0237'  # dotless j
1054 }
1055
1056
1057 def _convert_accent(accent, accented_char):
1058     type = accent
1059     char = accented_char
1060     if char == '':
1061         if type in special_accent_map:
1062             return special_accent_map[type]
1063         # a missing char is treated as space by LyX
1064         char = ' '
1065     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1066         # Special caron, only used with t, d, l and L.
1067         # It is not in the map because we convert it to the same unicode
1068         # character as the normal caron: \q{} is only defined if babel with
1069         # the czech or slovak language is used, and the normal caron
1070         # produces the correct output if the T1 font encoding is used.
1071         # For the same reason we never convert to \q{} in the other direction.
1072         type = 'v'
1073     elif char in accented_map:
1074         char = accented_map[char]
1075     elif (len(char) > 1):
1076         # We can only convert accents on a single char
1077         return ''
1078     a = accent_map.get(type)
1079     if a:
1080         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1081     return ''
1082
1083
1084 def convert_ertbackslash(body, i, ert, default_layout):
1085     r""" -------------------------------------------------------------------------------------------
1086     Convert backslashes and '\n' into valid ERT code, append the converted
1087     text to body[i] and return the (maybe incremented) line index i"""
1088
1089     for c in ert:
1090         if c == '\\':
1091             body[i] = body[i] + '\\backslash '
1092             i = i + 1
1093             body.insert(i, '')
1094         elif c == '\n':
1095             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1096             i = i + 4
1097         else:
1098             body[i] = body[i] + c
1099     return i
1100
1101
1102 def convert_accent(document):
1103     # The following forms are supported by LyX:
1104     # '\i \"{a}' (standard form, as written by LyX)
1105     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1106     # '\i \"{ }' (also accepted if the accented char is a space)
1107     # '\i \" a'  (also accepted)
1108     # '\i \"'    (also accepted)
1109     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1110     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1111     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1112     i = 0
1113     while 1:
1114         i = find_re(document.body, re_wholeinset, i)
1115         if i == -1:
1116             return
1117         match = re_wholeinset.match(document.body[i])
1118         prefix = match.group(1)
1119         contents = match.group(3).strip()
1120         match = re_contents.match(contents)
1121         if match:
1122             # Strip first char (always \)
1123             accent = match.group(1)[1:]
1124             accented_contents = match.group(2).strip()
1125             match = re_accentedcontents.match(accented_contents)
1126             accented_char = match.group(1)
1127             converted = _convert_accent(accent, accented_char)
1128             if converted == '':
1129                 # Normalize contents
1130                 contents = '%s{%s}' % (accent, accented_char),
1131             else:
1132                 document.body[i] = '%s%s' % (prefix, converted)
1133                 i += 1
1134                 continue
1135         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1136         document.body[i] = prefix
1137         document.body[i+1:i+1] = ['\\begin_inset ERT',
1138                                   'status collapsed',
1139                                   '',
1140                                   '\\begin_layout %s' % document.default_layout,
1141                                   '',
1142                                   '',
1143                                   '']
1144         i = convert_ertbackslash(document.body, i + 7,
1145                                  '\\%s' % contents,
1146                                  document.default_layout)
1147         document.body[i+1:i+1] = ['\\end_layout',
1148                                   '',
1149                                   '\\end_inset']
1150         i += 3
1151
1152
1153 def revert_accent(document):
1154     inverse_accent_map = {}
1155     for k in accent_map:
1156         inverse_accent_map[accent_map[k]] = k
1157     inverse_special_accent_map = {}
1158     for k in special_accent_map:
1159         inverse_special_accent_map[special_accent_map[k]] = k
1160     inverse_accented_map = {}
1161     for k in accented_map:
1162         inverse_accented_map[accented_map[k]] = k
1163
1164     # Since LyX may insert a line break within a word we must combine all
1165     # words before unicode normalization.
1166     # We do this only if the next line starts with an accent, otherwise we
1167     # would create things like '\begin_inset ERTstatus'.
1168     for i in range(len(document.body) - 1):
1169         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1170             continue
1171         if (document.body[i+1][0] in inverse_accent_map and document.body[i][:1] != '\\'):
1172             # the last character of this line and the first of the next line
1173             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1174             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1175                 document.body[i] += document.body[i+1][0]
1176                 document.body[i+1] = document.body[i+1][1:]
1177
1178     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1179     # This is needed to catch all accented characters.
1180     for i in range(len(document.body)):
1181         # Unfortunately we have a mixture of unicode strings and plain strings,
1182         # because we never use u'xxx' for string literals, but 'xxx'.
1183         # Therefore we may have to try two times to normalize the data.
1184         try:
1185             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1186         except TypeError:
1187             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1188
1189     # Replace accented characters with InsetLaTeXAccent
1190     # Do not convert characters that can be represented in the chosen
1191     # encoding.
1192     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1193     lang_re = re.compile(r"^\\lang\s(\S+)")
1194
1195     i = 0
1196     while i < len(document.body):
1197         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1198             # Track the encoding of the current line
1199             result = lang_re.match(document.body[i])
1200             if result:
1201                 language = result.group(1)
1202                 if language == "default":
1203                     encoding_stack[-1] = document.encoding
1204                 else:
1205                     from lyx2lyx_lang import lang
1206                     encoding_stack[-1] = lang[language][3]
1207                 continue
1208             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1209                 encoding_stack.append(encoding_stack[-1])
1210                 continue
1211             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1212                 del encoding_stack[-1]
1213                 continue
1214
1215         for j in range(len(document.body[i])):
1216             # dotless i and dotless j are both in special_accent_map and can
1217             # occur as an accented character, so we need to test that the
1218             # following character is no accent
1219             if (document.body[i][j] in inverse_special_accent_map and
1220                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1221                 accent = document.body[i][j]
1222                 try:
1223                     dummy = accent.encode(encoding_stack[-1])
1224                 except UnicodeEncodeError:
1225                     # Insert the rest of the line as new line
1226                     if j < len(document.body[i]) - 1:
1227                         document.body.insert(i+1, document.body[i][j+1:])
1228                     # Delete the accented character
1229                     document.body[i] = document.body[i][:j]
1230                     # Finally add the InsetLaTeXAccent
1231                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1232                     break
1233             elif j > 0 and document.body[i][j] in inverse_accent_map:
1234                 accented_char = document.body[i][j-1]
1235                 if accented_char == ' ':
1236                     # Conform to LyX output
1237                     accented_char = ''
1238                 elif accented_char in inverse_accented_map:
1239                     accented_char = inverse_accented_map[accented_char]
1240                 accent = document.body[i][j]
1241                 try:
1242                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1243                 except UnicodeEncodeError:
1244                     # Insert the rest of the line as new line
1245                     if j < len(document.body[i]) - 1:
1246                         document.body.insert(i+1, document.body[i][j+1:])
1247                     # Delete the accented characters
1248                     document.body[i] = document.body[i][:j-1]
1249                     # Finally add the InsetLaTeXAccent
1250                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1251                     break
1252         i = i + 1
1253
1254     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1255     for i in range(len(document.body)):
1256         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1257
1258
1259 def normalize_font_whitespace_259(document):
1260     """ Before format 259 the font changes were ignored if a
1261     whitespace was the first or last character in the sequence, this function
1262     transfers the whitespace outside."""
1263
1264     char_properties = {"\\series": "default",
1265                        "\\emph": "default",
1266                        "\\color": "none",
1267                        "\\shape": "default",
1268                        "\\bar": "default",
1269                        "\\family": "default"}
1270     return normalize_font_whitespace(document, char_properties)
1271
1272 def normalize_font_whitespace_274(document):
1273     """ Before format 259 (sic) the font changes were ignored if a
1274     whitespace was the first or last character in the sequence. This was
1275     corrected for most font properties in format 259, but the language
1276     was forgotten then. This function applies the same conversion done
1277     there (namely, transfers the whitespace outside) for font language
1278     changes, as well."""
1279
1280     char_properties = {"\\lang": "default"}
1281     return normalize_font_whitespace(document, char_properties)
1282
1283 def get_paragraph_language(document, i):
1284     """ Return the language of the paragraph in which line i of the document
1285     body is. If the first thing in the paragraph is a \\lang command, that
1286     is the paragraph's langauge; otherwise, the paragraph's language is the
1287     document's language."""
1288
1289     lines = document.body
1290
1291     first_nonempty_line = \
1292         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1293
1294     words = lines[first_nonempty_line].split()
1295
1296     if len(words) > 1 and words[0] == "\\lang":
1297         return words[1]
1298     else:
1299         return document.language
1300
1301 def normalize_font_whitespace(document, char_properties):
1302     """ Before format 259 the font changes were ignored if a
1303     whitespace was the first or last character in the sequence, this function
1304     transfers the whitespace outside. Only a change in one of the properties
1305     in the provided     char_properties is handled by this function."""
1306
1307     if document.backend != "latex":
1308         return
1309
1310     lines = document.body
1311
1312     changes = {}
1313
1314     i = 0
1315     while i < len(lines):
1316         words = lines[i].split()
1317
1318         if len(words) > 0 and words[0] == "\\begin_layout":
1319             # a new paragraph resets all font changes
1320             changes.clear()
1321             # also reset the default language to be the paragraph's language
1322             if "\\lang" in char_properties.keys():
1323                 char_properties["\\lang"] = \
1324                     get_paragraph_language(document, i + 1)
1325
1326         elif len(words) > 1 and words[0] in char_properties.keys():
1327             # we have a font change
1328             if char_properties[words[0]] == words[1]:
1329                 # property gets reset
1330                 if words[0] in changes.keys():
1331                     del changes[words[0]]
1332                 defaultproperty = True
1333             else:
1334                 # property gets set
1335                 changes[words[0]] = words[1]
1336                 defaultproperty = False
1337
1338             # We need to explicitly reset all changed properties if we find
1339             # a space below, because LyX 1.4 would output the space after
1340             # closing the previous change and before starting the new one,
1341             # and closing a font change means to close all properties, not
1342             # just the changed one.
1343
1344             if lines[i-1] and lines[i-1][-1] == " ":
1345                 lines[i-1] = lines[i-1][:-1]
1346                 # a space before the font change
1347                 added_lines = [" "]
1348                 for k in changes.keys():
1349                     # exclude property k because that is already in lines[i]
1350                     if k != words[0]:
1351                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1352                 for k in changes.keys():
1353                     # exclude property k because that must be added below anyway
1354                     if k != words[0]:
1355                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1356                 if defaultproperty:
1357                     # Property is reset in lines[i], so add the new stuff afterwards
1358                     lines[i+1:i+1] = added_lines
1359                 else:
1360                     # Reset property for the space
1361                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1362                     lines[i:i] = added_lines
1363                 i = i + len(added_lines)
1364
1365             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1366                 # a space after the font change
1367                 if (lines[i+1] == " " and lines[i+2]):
1368                     next_words = lines[i+2].split()
1369                     if len(next_words) > 0 and next_words[0] == words[0]:
1370                         # a single blank with a property different from the
1371                         # previous and the next line must not be changed
1372                         i = i + 2
1373                         continue
1374                 lines[i+1] = lines[i+1][1:]
1375                 added_lines = [" "]
1376                 for k in changes.keys():
1377                     # exclude property k because that is already in lines[i]
1378                     if k != words[0]:
1379                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1380                 for k in changes.keys():
1381                     # exclude property k because that must be added below anyway
1382                     if k != words[0]:
1383                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1384                 # Reset property for the space
1385                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1386                 lines[i:i] = added_lines
1387                 i = i + len(added_lines)
1388
1389         i = i + 1
1390
1391
1392 def revert_utf8x(document):
1393     " Set utf8x encoding to utf8. "
1394     i = find_token(document.header, "\\inputencoding", 0)
1395     if i == -1:
1396         document.header.append("\\inputencoding auto")
1397     else:
1398         inputenc = get_value(document.header, "\\inputencoding", i)
1399         if inputenc == "utf8x":
1400             document.header[i] = "\\inputencoding utf8"
1401     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1402
1403
1404 def revert_utf8plain(document):
1405     " Set utf8plain encoding to utf8. "
1406     i = find_token(document.header, "\\inputencoding", 0)
1407     if i == -1:
1408         document.header.append("\\inputencoding auto")
1409     else:
1410         inputenc = get_value(document.header, "\\inputencoding", i)
1411         if inputenc == "utf8-plain":
1412             document.header[i] = "\\inputencoding utf8"
1413     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1414
1415
1416 def revert_beamer_alert(document):
1417     " Revert beamer's \\alert inset back to ERT. "
1418     i = 0
1419     while 1:
1420         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1421         if i == -1:
1422             return
1423         document.body[i] = "\\begin_inset ERT"
1424         i = i + 1
1425         while 1:
1426             if (document.body[i][:13] == "\\begin_layout"):
1427                 # Insert the \alert command
1428                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1429                 break
1430             i = i + 1
1431
1432         i = i + 1
1433
1434
1435 def revert_beamer_structure(document):
1436     " Revert beamer's \\structure inset back to ERT. "
1437     i = 0
1438     while 1:
1439         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1440         if i == -1:
1441             return
1442         document.body[i] = "\\begin_inset ERT"
1443         i = i + 1
1444         while 1:
1445             if (document.body[i][:13] == "\\begin_layout"):
1446                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1447                 break
1448             i = i + 1
1449
1450         i = i + 1
1451
1452
1453 def convert_changes(document):
1454     " Switch output_changes off if tracking_changes is off. "
1455     i = find_token(document.header, '\\tracking_changes', 0)
1456     if i == -1:
1457         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1458         return
1459     j = find_token(document.header, '\\output_changes', 0)
1460     if j == -1:
1461         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1462         return
1463     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1464     output_changes = get_value(document.header, "\\output_changes", j)
1465     if tracking_changes == "false" and output_changes == "true":
1466         document.header[j] = "\\output_changes false"
1467
1468
1469 def revert_ascii(document):
1470     " Set ascii encoding to auto. "
1471     i = find_token(document.header, "\\inputencoding", 0)
1472     if i == -1:
1473         document.header.append("\\inputencoding auto")
1474     else:
1475         inputenc = get_value(document.header, "\\inputencoding", i)
1476         if inputenc == "ascii":
1477             document.header[i] = "\\inputencoding auto"
1478     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1479
1480
1481 def normalize_language_name(document):
1482     lang = { "brazil": "brazilian",
1483              "portuges": "portuguese"}
1484
1485     if document.language in lang:
1486         document.language = lang[document.language]
1487         i = find_token(document.header, "\\language", 0)
1488         document.header[i] = "\\language %s" % document.language
1489
1490
1491 def revert_language_name(document):
1492     lang = { "brazilian": "brazil",
1493              "portuguese": "portuges"}
1494
1495     if document.language in lang:
1496         document.language = lang[document.language]
1497         i = find_token(document.header, "\\language", 0)
1498         document.header[i] = "\\language %s" % document.language
1499
1500 #
1501 #  \textclass cv -> \textclass simplecv
1502 def convert_cv_textclass(document):
1503     if document.textclass == "cv":
1504         document.textclass = "simplecv"
1505
1506
1507 def revert_cv_textclass(document):
1508     if document.textclass == "simplecv":
1509         document.textclass = "cv"
1510
1511
1512 #
1513 # add scaleBeforeRotation graphics param
1514 def convert_graphics_rotation(document):
1515     " add scaleBeforeRotation graphics parameter. "
1516     i = 0
1517     while 1:
1518         i = find_token(document.body, "\\begin_inset Graphics", i)
1519         if i == -1:
1520             return
1521         j = find_end_of_inset(document.body, i+1)
1522         if j == -1:
1523             # should not happen
1524             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1525         # Seach for rotateAngle and width or height or scale
1526         # If these params are not there, nothing needs to be done.
1527         k = find_token(document.body, "\trotateAngle", i + 1, j)
1528         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1529         if (k != -1 and l != -1):
1530             document.body.insert(j, 'scaleBeforeRotation')
1531         i = i + 1
1532
1533
1534 #
1535 # remove scaleBeforeRotation graphics param
1536 def revert_graphics_rotation(document):
1537     " remove scaleBeforeRotation graphics parameter. "
1538     i = 0
1539     while 1:
1540         i = find_token(document.body, "\\begin_inset Graphics", i)
1541         if i == -1:
1542             return
1543         j = find_end_of_inset(document.body, i + 1)
1544         if j == -1:
1545             # should not happen
1546             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1547         # If there's a scaleBeforeRotation param, just remove that
1548         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1549         if k != -1:
1550             del document.body[k]
1551         else:
1552             # if not, and if we have rotateAngle and width or height or scale,
1553             # we have to put the rotateAngle value to special
1554             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1555             special = get_value(document.body, 'special', i + 1, j)
1556             if rotateAngle != "":
1557                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1558                 if k == -1:
1559                     break
1560                 if special == "":
1561                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1562                 else:
1563                     l = find_token(document.body, "\tspecial", i + 1, j)
1564                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1565                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1566                 if k != -1:
1567                     del document.body[k]
1568         i = i + 1
1569
1570
1571
1572 def convert_tableborder(document):
1573     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1574     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1575     i = 0
1576     while i < len(document.body):
1577         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1578         k = document.body[i].find("|>{", 0, len(document.body[i]))
1579         # the two tokens have to be in one line
1580         if (h != -1 and k != -1):
1581             # delete the "|"
1582             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1583         i = i + 1
1584
1585
1586 def revert_tableborder(document):
1587     i = 0
1588     while i < len(document.body):
1589         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1590         k = document.body[i].find(">{", 0, len(document.body[i]))
1591         # the two tokens have to be in one line
1592         if (h != -1 and k != -1):
1593             # add the "|"
1594             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1595         i = i + 1
1596
1597
1598 def revert_armenian(document):
1599
1600     # set inputencoding from armscii8 to auto
1601     if document.inputencoding == "armscii8":
1602         i = find_token(document.header, "\\inputencoding", 0)
1603         if i != -1:
1604             document.header[i] = "\\inputencoding auto"
1605     # check if preamble exists, if not k is set to -1
1606     i = 0
1607     k = -1
1608     while i < len(document.preamble):
1609         if k == -1:
1610             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1611         if k == -1:
1612             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1613         i = i + 1
1614     # add the entry \usepackage{armtex} to the document preamble
1615     if document.language == "armenian":
1616         # set the armtex entry as the first preamble line
1617         if k != -1:
1618             document.preamble[0:0] = ["\\usepackage{armtex}"]
1619         # create the preamble when it doesn't exist
1620         else:
1621             document.preamble.append('\\usepackage{armtex}')
1622     # Set document language from armenian to english
1623     if document.language == "armenian":
1624         document.language = "english"
1625         i = find_token(document.header, "\\language", 0)
1626         if i != -1:
1627             document.header[i] = "\\language english"
1628
1629
1630 def revert_CJK(document):
1631     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1632     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1633                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1634     i = find_token(document.header, "\\inputencoding", 0)
1635     if i == -1:
1636         document.header.append("\\inputencoding auto")
1637     else:
1638         inputenc = get_value(document.header, "\\inputencoding", i)
1639         if inputenc in encodings:
1640             document.header[i] = "\\inputencoding default"
1641     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1642
1643     if document.language == "chinese-simplified" or \
1644        document.language == "chinese-traditional" or \
1645        document.language == "japanese" or document.language == "korean":
1646         document.language = "english"
1647         i = find_token(document.header, "\\language", 0)
1648         if i != -1:
1649             document.header[i] = "\\language english"
1650
1651
1652 def revert_preamble_listings_params(document):
1653     " Revert preamble option \listings_params "
1654     i = find_token(document.header, "\\listings_params", 0)
1655     if i != -1:
1656         document.preamble.append('\\usepackage{listings}')
1657         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1658         document.header.pop(i);
1659
1660
1661 def revert_listings_inset(document):
1662     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1663 FROM
1664
1665 \begin_inset
1666 lstparams "language=Delphi"
1667 inline true
1668 status open
1669
1670 \begin_layout Standard
1671 var i = 10;
1672 \end_layout
1673
1674 \end_inset
1675
1676 TO
1677
1678 \begin_inset ERT
1679 status open
1680 \begin_layout Standard
1681
1682
1683 \backslash
1684 lstinline[language=Delphi]{var i = 10;}
1685 \end_layout
1686
1687 \end_inset
1688
1689 There can be an caption inset in this inset
1690
1691 \begin_layout Standard
1692 \begin_inset Caption
1693
1694 \begin_layout Standard
1695 before label
1696 \begin_inset LatexCommand label
1697 name "lst:caption"
1698
1699 \end_inset
1700
1701 after label
1702 \end_layout
1703
1704 \end_inset
1705
1706
1707 \end_layout
1708
1709 '''
1710     i = 0
1711     while True:
1712         i = find_token(document.body, '\\begin_inset listings', i)
1713         if i == -1:
1714             break
1715         else:
1716             if not '\\usepackage{listings}' in document.preamble:
1717                 document.preamble.append('\\usepackage{listings}')
1718         j = find_end_of_inset(document.body, i + 1)
1719         if j == -1:
1720             # this should not happen
1721             break
1722         inline = 'false'
1723         params = ''
1724         status = 'open'
1725         # first three lines
1726         for line in range(i + 1, i + 4):
1727             if document.body[line].startswith('inline'):
1728                 inline = document.body[line].split()[1]
1729             if document.body[line].startswith('lstparams'):
1730                 params = document.body[line].split()[1].strip('"')
1731             if document.body[line].startswith('status'):
1732                 status = document.body[line].split()[1].strip()
1733                 k = line + 1
1734         # caption?
1735         caption = ''
1736         label = ''
1737         cap = find_token(document.body, '\\begin_inset Caption', i)
1738         if cap != -1:
1739             cap_end = find_end_of_inset(document.body, cap + 1)
1740             if cap_end == -1:
1741                 # this should not happen
1742                 break
1743             # label?
1744             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1745             if lbl != -1:
1746                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1747                 if lbl_end == -1:
1748                     # this should not happen
1749                     break
1750             else:
1751                 lbl = cap_end
1752                 lbl_end = cap_end
1753             for line in document.body[lbl : lbl_end + 1]:
1754                 if line.startswith('name '):
1755                     label = line.split()[1].strip('"')
1756                     break
1757             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1758                 if not line.startswith('\\'):
1759                     caption += line.strip()
1760             k = cap_end + 1
1761         inlinecode = ''
1762         # looking for the oneline code for lstinline
1763         inlinecode = document.body[find_end_of_layout(document.body,
1764             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1765         if len(caption) > 0:
1766             if len(params) == 0:
1767                 params = 'caption={%s}' % caption
1768             else:
1769                 params += ',caption={%s}' % caption
1770         if len(label) > 0:
1771             if len(params) == 0:
1772                 params = 'label={%s}' % label
1773             else:
1774                 params += ',label={%s}' % label
1775         if len(params) > 0:
1776             params = '[%s]' % params
1777             params = params.replace('\\', '\\backslash\n')
1778         if inline == 'true':
1779             document.body[i:(j+1)] = [r'\begin_inset ERT',
1780                                       'status %s' % status,
1781                                       r'\begin_layout %s' % document.default_layout,
1782                                       '',
1783                                       '',
1784                                       r'\backslash',
1785                                       'lstinline%s{%s}' % (params, inlinecode),
1786                                       r'\end_layout',
1787                                       '',
1788                                       r'\end_inset']
1789         else:
1790             document.body[i: j+1] =  [r'\begin_inset ERT',
1791                                       'status %s' % status,
1792                                       '',
1793                                       r'\begin_layout %s' % document.default_layout,
1794                                       '',
1795                                       '',
1796                                       r'\backslash',
1797                                       r'begin{lstlisting}%s' % params,
1798                                       r'\end_layout',
1799                                       '',
1800                                       r'\begin_layout %s' % document.default_layout,
1801                                     ] + document.body[k : j - 1] + \
1802                                      ['',
1803                                       r'\begin_layout %s' % document.default_layout,
1804                                       '',
1805                                       r'\backslash',
1806                                       'end{lstlisting}',
1807                                       r'\end_layout',
1808                                       '',
1809                                       r'\end_inset']
1810
1811
1812 def revert_include_listings(document):
1813     r''' Revert lstinputlisting Include option , translate
1814 \begin_inset Include \lstinputlisting{file}[opt]
1815 preview false
1816
1817 \end_inset
1818
1819 TO
1820
1821 \begin_inset ERT
1822 status open
1823
1824 \begin_layout Standard
1825
1826
1827 \backslash
1828 lstinputlisting{file}[opt]
1829 \end_layout
1830
1831 \end_inset
1832     '''
1833
1834     i = 0
1835     while True:
1836         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1837         if i == -1:
1838             break
1839         else:
1840             if not '\\usepackage{listings}' in document.preamble:
1841                 document.preamble.append('\\usepackage{listings}')
1842         j = find_end_of_inset(document.body, i + 1)
1843         if j == -1:
1844             # this should not happen
1845             break
1846         # find command line lstinputlisting{file}[options]
1847         cmd, file, option = '', '', ''
1848         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1849             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1850         option = option.replace('\\', '\\backslash\n')
1851         document.body[i : j + 1] = [r'\begin_inset ERT',
1852                                     'status open',
1853                                     '',
1854                                     r'\begin_layout %s' % document.default_layout,
1855                                     '',
1856                                     '',
1857                                     r'\backslash',
1858                                     '%s%s{%s}' % (cmd, option, file),
1859                                     r'\end_layout',
1860                                     '',
1861                                     r'\end_inset']
1862
1863
1864 def revert_ext_font_sizes(document):
1865     if document.backend != "latex": return
1866     if not document.textclass.startswith("ext"): return
1867
1868     fontsize = get_value(document.header, '\\paperfontsize', 0)
1869     if fontsize not in ('10', '11', '12'): return
1870     fontsize += 'pt'
1871
1872     i = find_token(document.header, '\\paperfontsize', 0)
1873     document.header[i] = '\\paperfontsize default'
1874
1875     i = find_token(document.header, '\\options', 0)
1876     if i == -1:
1877         i = find_token(document.header, '\\textclass', 0) + 1
1878         document.header[i:i] = ['\\options %s' % fontsize]
1879     else:
1880         document.header[i] += ',%s' % fontsize
1881
1882
1883 def convert_ext_font_sizes(document):
1884     if document.backend != "latex": return
1885     if not document.textclass.startswith("ext"): return
1886
1887     fontsize = get_value(document.header, '\\paperfontsize', 0)
1888     if fontsize != 'default': return
1889
1890     i = find_token(document.header, '\\options', 0)
1891     if i == -1: return
1892
1893     options = get_value(document.header, '\\options', i)
1894
1895     fontsizes = '10pt', '11pt', '12pt'
1896     for fs in fontsizes:
1897         if options.find(fs) != -1:
1898             break
1899     else: # this else will only be attained if the for cycle had no match
1900         return
1901
1902     options = options.split(',')
1903     for j, opt in enumerate(options):
1904         if opt in fontsizes:
1905             fontsize = opt[:-2]
1906             del options[j]
1907             break
1908     else:
1909         return
1910
1911     k = find_token(document.header, '\\paperfontsize', 0)
1912     document.header[k] = '\\paperfontsize %s' % fontsize
1913
1914     if options:
1915         document.header[i] = '\\options %s' % ','.join(options)
1916     else:
1917         del document.header[i]
1918
1919
1920 def revert_separator_layout(document):
1921     r'''Revert --Separator-- to a lyx note
1922 From
1923
1924 \begin_layout --Separator--
1925 something
1926 \end_layout
1927
1928 to
1929
1930 \begin_layout Standard
1931 \begin_inset Note Note
1932 status open
1933
1934 \begin_layout Standard
1935 Separate Evironment
1936 \end_layout
1937
1938 \end_inset
1939 something
1940
1941 \end_layout
1942
1943     '''
1944
1945     i = 0
1946     while True:
1947         i = find_token(document.body, r'\begin_layout --Separator--', i)
1948         if i == -1:
1949             break
1950         j = find_end_of_layout(document.body, i + 1)
1951         if j == -1:
1952             # this should not happen
1953             break
1954         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1955                                     r'\begin_inset Note Note',
1956                                     'status open',
1957                                     '',
1958                                     r'\begin_layout %s' % document.default_layout,
1959                                     'Separate Environment',
1960                                     r'\end_layout',
1961                                     '',
1962                                     r'\end_inset'] + \
1963                                     document.body[ i + 1 : j] + \
1964                                     ['',
1965                                     r'\end_layout'
1966                                     ]
1967
1968
1969 def convert_arabic (document):
1970     if document.language == "arabic":
1971         document.language = "arabic_arabtex"
1972         i = find_token(document.header, "\\language", 0)
1973         if i != -1:
1974             document.header[i] = "\\language arabic_arabtex"
1975     i = 0
1976     while i < len(document.body):
1977         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1978         if (h != -1):
1979             # change the language name
1980             document.body[i] = '\lang arabic_arabtex'
1981         i = i + 1
1982
1983
1984 def revert_arabic (document):
1985     if document.language == "arabic_arabtex":
1986         document.language = "arabic"
1987         i = find_token(document.header, "\\language", 0)
1988         if i != -1:
1989             document.header[i] = "\\language arabic"
1990     i = 0
1991     while i < len(document.body):
1992         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1993         if (h != -1):
1994             # change the language name
1995             document.body[i] = '\lang arabic'
1996         i = i + 1
1997
1998
1999 ##
2000 # Conversion hub
2001 #
2002
2003 supported_versions = ["1.5.0","1.5"]
2004 convert = [[246, []],
2005            [247, [convert_font_settings]],
2006            [248, []],
2007            [249, [convert_utf8]],
2008            [250, []],
2009            [251, []],
2010            [252, [convert_commandparams, convert_bibitem]],
2011            [253, []],
2012            [254, [convert_esint]],
2013            [255, []],
2014            [256, []],
2015            [257, [convert_caption]],
2016            [258, [convert_lyxline]],
2017            [259, [convert_accent, normalize_font_whitespace_259]],
2018            [260, []],
2019            [261, [convert_changes]],
2020            [262, []],
2021            [263, [normalize_language_name]],
2022            [264, [convert_cv_textclass]],
2023            [265, [convert_tableborder]],
2024            [266, []],
2025            [267, []],
2026            [268, []],
2027            [269, []],
2028            [270, []],
2029            [271, [convert_ext_font_sizes]],
2030            [272, []],
2031            [273, []],
2032            [274, [normalize_font_whitespace_274]],
2033            [275, [convert_graphics_rotation]],
2034            [276, [convert_arabic]]
2035           ]
2036
2037 revert =  [
2038            [275, [revert_arabic]],
2039            [274, [revert_graphics_rotation]],
2040            [273, []],
2041            [272, [revert_separator_layout]],
2042            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2043            [270, [revert_ext_font_sizes]],
2044            [269, [revert_beamer_alert, revert_beamer_structure]],
2045            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2046            [267, [revert_CJK]],
2047            [266, [revert_utf8plain]],
2048            [265, [revert_armenian]],
2049            [264, [revert_tableborder]],
2050            [263, [revert_cv_textclass]],
2051            [262, [revert_language_name]],
2052            [261, [revert_ascii]],
2053            [260, []],
2054            [259, [revert_utf8x]],
2055            [258, []],
2056            [257, []],
2057            [256, [revert_caption]],
2058            [255, [revert_encodings]],
2059            [254, [revert_clearpage, revert_cleardoublepage]],
2060            [253, [revert_esint]],
2061            [252, [revert_nomenclature, revert_printnomenclature]],
2062            [251, [revert_commandparams]],
2063            [250, [revert_cs_label]],
2064            [249, []],
2065            [248, [revert_accent, revert_utf8, revert_unicode]],
2066            [247, [revert_booktabs]],
2067            [246, [revert_font_settings]],
2068            [245, [revert_framed]]]
2069
2070
2071 if __name__ == "__main__":
2072     pass