lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  27 from LyX import get_encoding
  28
  29
  30 ####################################################################
  31 # Private helper functions
  32
  33 def find_end_of_inset(lines, i):
  34     " Find end of inset, where lines[i] is included."
  35     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  36
  37 def find_end_of_layout(lines, i):
  38     " Find end of layout, where lines[i] is included."
  39     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  40
  41 def find_beginning_of_layout(lines, i):
  42     "Find beginning of layout, where lines[i] is included."
  43     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  44
  45 # End of helper functions
  46 ####################################################################
  47
  48
  49 ##
  50 #  Notes: Framed/Shaded
  51 #
  52
  53 def revert_framed(document):
  54     "Revert framed notes. "
  55     i = 0
  56     while 1:
  57         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  58
  59         if i == -1:
  60             return
  61         document.body[i] = "\\begin_inset Note"
  62         i = i + 1
  63
  64
  65 ##
  66 #  Fonts
  67 #
  68
  69 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  70                     'times'   : 'times',   'palatino' : 'palatino',
  71                     'helvet'  : 'default', 'avant'    : 'default',
  72                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  73                     'pslatex' : 'times'}
  74 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'helvet',  'avant'    : 'avant',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'helvet'}
  79 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  80                     'times'   : 'default', 'palatino' : 'default',
  81                     'helvet'  : 'default', 'avant'    : 'default',
  82                     'newcent' : 'default', 'bookman'  : 'default',
  83                     'pslatex' : 'courier'}
  84
  85 def convert_font_settings(document):
  86     " Convert font settings. "
  87     i = 0
  88     i = find_token_exact(document.header, "\\fontscheme", i)
  89     if i == -1:
  90         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  91         return
  92     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  93     if font_scheme == '':
  94         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  95         font_scheme = 'default'
  96     if not font_scheme in roman_fonts.keys():
  97         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  98         font_scheme = 'default'
  99     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
 100                           '\\font_sans %s' % sans_fonts[font_scheme],
 101                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 102                           '\\font_default_family default',
 103                           '\\font_sc false',
 104                           '\\font_osf false',
 105                           '\\font_sf_scale 100',
 106                           '\\font_tt_scale 100']
 107
 108
 109 def revert_font_settings(document):
 110     " Revert font settings. "
 111     i = 0
 112     insert_line = -1
 113     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 114     for family in 'roman', 'sans', 'typewriter':
 115         name = '\\font_%s' % family
 116         i = find_token_exact(document.header, name, i)
 117         if i == -1:
 118             document.warning("Malformed LyX document: Missing `%s'." % name)
 119             i = 0
 120         else:
 121             if (insert_line < 0):
 122                 insert_line = i
 123             fonts[family] = get_value(document.header, name, i, i + 1)
 124             del document.header[i]
 125     i = find_token_exact(document.header, '\\font_default_family', i)
 126     if i == -1:
 127         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 128         font_default_family = 'default'
 129     else:
 130         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 131         del document.header[i]
 132     i = find_token_exact(document.header, '\\font_sc', i)
 133     if i == -1:
 134         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 135         font_sc = 'false'
 136     else:
 137         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 138         del document.header[i]
 139     if font_sc != 'false':
 140         document.warning("Conversion of '\\font_sc' not yet implemented.")
 141     i = find_token_exact(document.header, '\\font_osf', i)
 142     if i == -1:
 143         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 144         font_osf = 'false'
 145     else:
 146         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 147         del document.header[i]
 148     i = find_token_exact(document.header, '\\font_sf_scale', i)
 149     if i == -1:
 150         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 151         font_sf_scale = '100'
 152     else:
 153         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 154         del document.header[i]
 155     if font_sf_scale != '100':
 156         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 157     i = find_token_exact(document.header, '\\font_tt_scale', i)
 158     if i == -1:
 159         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 160         font_tt_scale = '100'
 161     else:
 162         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 163         del document.header[i]
 164     if font_tt_scale != '100':
 165         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 166     for font_scheme in roman_fonts.keys():
 167         if (roman_fonts[font_scheme] == fonts['roman'] and
 168             sans_fonts[font_scheme] == fonts['sans'] and
 169             typewriter_fonts[font_scheme] == fonts['typewriter']):
 170             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 171             if font_default_family != 'default':
 172                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 173             if font_osf == 'true':
 174                 document.warning("Ignoring `\\font_osf = true'")
 175             return
 176     font_scheme = 'default'
 177     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 178     if fonts['roman'] == 'cmr':
 179         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 180         if font_osf == 'true':
 181             document.preamble.append('\\usepackage{eco}')
 182             font_osf = 'false'
 183     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 184         if fonts['roman'] == font:
 185             document.preamble.append('\\usepackage{%s}' % font)
 186     for font in 'cmss', 'lmss', 'cmbr':
 187         if fonts['sans'] == font:
 188             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 189     for font in 'berasans':
 190         if fonts['sans'] == font:
 191             document.preamble.append('\\usepackage{%s}' % font)
 192     for font in 'cmtt', 'lmtt', 'cmtl':
 193         if fonts['typewriter'] == font:
 194             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 195     for font in 'courier', 'beramono', 'luximono':
 196         if fonts['typewriter'] == font:
 197             document.preamble.append('\\usepackage{%s}' % font)
 198     if font_default_family != 'default':
 199         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 200     if font_osf == 'true':
 201         document.warning("Ignoring `\\font_osf = true'")
 202
 203
 204 def revert_booktabs(document):
 205     " We remove the booktabs flag or everything else will become a mess. "
 206     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 207     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 208     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 209     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 210     i = 0
 211     while 1:
 212         i = find_token(document.body, "\\begin_inset Tabular", i)
 213         if i == -1:
 214             return
 215         j = find_end_of_inset(document.body, i + 1)
 216         if j == -1:
 217             document.warning("Malformed LyX document: Could not find end of tabular.")
 218             continue
 219         for k in range(i, j):
 220             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 221                 document.warning("Converting 'booktabs' table to normal table.")
 222                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 223             if re.search(re_row, document.body[k]):
 224                 document.warning("Removing extra row space.")
 225                 document.body[k] = re_tspace.sub('', document.body[k])
 226                 document.body[k] = re_bspace.sub('', document.body[k])
 227                 document.body[k] = re_ispace.sub('', document.body[k])
 228         i = i + 1
 229
 230
 231 def convert_multiencoding(document, forward):
 232     """ Fix files with multiple encodings.
 233 Files with an inputencoding of "auto" or "default" and multiple languages
 234 where at least two languages have different default encodings are encoded
 235 in multiple encodings for file formats < 249. These files are incorrectly
 236 read and written (as if the whole file was in the encoding of the main
 237 language).
 238 This is not true for files written by CJK-LyX, they are always in the locale
 239 encoding.
 240
 241 This function
 242 - converts from fake unicode values to true unicode if forward is true, and
 243 - converts from true unicode values to fake unicode if forward is false.
 244 document.encoding must be set to the old value (format 248) in both cases.
 245
 246 We do this here and not in LyX.py because it is far easier to do the
 247 necessary parsing in modern formats than in ancient ones.
 248 """
 249     inset_types = ["Foot", "Note"]
 250     if document.cjk_encoding != '':
 251         return
 252     encoding_stack = [document.encoding]
 253     insets = []
 254     lang_re = re.compile(r"^\\lang\s(\S+)")
 255     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 256     if not forward: # no need to read file unless we are reverting
 257         spec_chars = read_unicodesymbols()
 258
 259     if document.inputencoding == "auto" or document.inputencoding == "default":
 260         i = 0
 261         while i < len(document.body):
 262             result = lang_re.match(document.body[i])
 263             if result:
 264                 language = result.group(1)
 265                 if language == "default":
 266                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 267                     encoding_stack[-1] = document.encoding
 268                 else:
 269                     from lyx2lyx_lang import lang
 270                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 271                     encoding_stack[-1] = lang[language][3]
 272             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 273                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 274                 if len(insets) > 0 and insets[-1] in inset_types:
 275                     from lyx2lyx_lang import lang
 276                     encoding_stack.append(lang[document.language][3])
 277                 else:
 278                     encoding_stack.append(encoding_stack[-1])
 279             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 280                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 281                 if len(encoding_stack) == 1:
 282                     # Don't remove the document encoding from the stack
 283                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 284                 else:
 285                     del encoding_stack[-1]
 286             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 287                 inset_result = inset_re.match(document.body[i])
 288                 if inset_result:
 289                     insets.append(inset_result.group(1))
 290                 else:
 291                     insets.append("")
 292             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 293                 del insets[-1]
 294             if encoding_stack[-1] != document.encoding:
 295                 if forward:
 296                     # This line has been incorrectly interpreted as if it was
 297                     # encoded in 'encoding'.
 298                     # Convert back to the 8bit string that was in the file.
 299                     orig = document.body[i].encode(document.encoding)
 300                     # Convert the 8bit string that was in the file to unicode
 301                     # with the correct encoding.
 302                     document.body[i] = orig.decode(encoding_stack[-1])
 303                 else:
 304                     try:
 305                         # Convert unicode to the 8bit string that will be written
 306                         # to the file with the correct encoding.
 307                         orig = document.body[i].encode(encoding_stack[-1])
 308                         # Convert the 8bit string that will be written to the
 309                         # file to fake unicode with the encoding that will later
 310                         # be used when writing to the file.
 311                         document.body[i] = orig.decode(document.encoding)
 312                     except:
 313                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 314                         document.body[i:i+1] = mod_line.split('\n')
 315                         i += len(mod_line.split('\n')) - 1
 316             i += 1
 317
 318
 319 def convert_utf8(document):
 320     " Set document encoding to UTF-8. "
 321     convert_multiencoding(document, True)
 322     document.encoding = "utf8"
 323
 324
 325 def revert_utf8(document):
 326     " Set document encoding to the value corresponding to inputencoding. "
 327     i = find_token(document.header, "\\inputencoding", 0)
 328     if i == -1:
 329         document.header.append("\\inputencoding auto")
 330     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 331         document.header[i] = "\\inputencoding auto"
 332     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 333     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 334     convert_multiencoding(document, False)
 335
 336
 337 def read_unicodesymbols():
 338     " Read the unicodesymbols list of unicode characters and corresponding commands."
 339     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 340     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
 341     spec_chars = {}
 342     for line in fp.readlines():
 343         if line[0] != '#':
 344             line=line.replace(' "',' ') # remove all quotation marks with spaces before
 345             line=line.replace('" ',' ') # remove all quotation marks with spaces after
 346             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
 347             try:
 348                 # flag1 and flag2 are preamble and other flags
 349                 [ucs4,command,flag1,flag2] =line.split(None,3)
 350                 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
 351             except:
 352                 pass
 353     fp.close()
 354     return spec_chars
 355
 356
 357 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
 358     # Define strings to start and end ERT and math insets
 359     ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
 360     ert_outro='\n\\end_layout\n\n\\end_inset\n'
 361     math_intro='\n\\begin_inset Formula $'
 362     math_outro='$\n\\end_inset'
 363
 364     mod_line = u''
 365     if i and not is_inset_line(document, i-1):
 366         last_char = document.body[i - 1][-1:]
 367     else:
 368         last_char = ''
 369
 370     line = document.body[i]
 371     for character in line:
 372         try:
 373             # Try to write the character
 374             dummy = character.encode(document.encoding)
 375             mod_line += character
 376             last_char = character
 377         except:
 378             # Try to replace with ERT/math inset
 379             if spec_chars.has_key(character):
 380                 command = spec_chars[character][0] # the command to replace unicode
 381                 flag1 = spec_chars[character][1]
 382                 flag2 = spec_chars[character][2]
 383                 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
 384                     # We have a character that should be combined with the previous
 385                     command += '{' + last_char + '}'
 386                     # Remove the last character. Ignore if it is whitespace
 387                     if len(last_char.rstrip()):
 388                         # last_char was found and is not whitespace
 389                         if mod_line:
 390                             mod_line = mod_line[:-1]
 391                         else: # last_char belongs to the last line
 392                             document.body[i-1] = document.body[i-1][:-1]
 393                     else:
 394                         # The last character was replaced by a command. For now it is
 395                         # ignored. This could be handled better.
 396                         pass
 397                 if command[0:2] == '\\\\':
 398                     if command[2:12]=='ensuremath':
 399                         if insets and insets[-1] == "ERT":
 400                             # math in ERT
 401                             command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
 402                             command = command.replace('}', '$\n')
 403                         elif not insets or insets[-1] != "Formula":
 404                             # add a math inset with the replacement character
 405                             command = command.replace('\\\\ensuremath{\\', math_intro)
 406                             command = command.replace('}', math_outro)
 407                         else:
 408                             # we are already in a math inset
 409                             command = command.replace('\\\\ensuremath{\\', '')
 410                             command = command.replace('}', '')
 411                     else:
 412                         if insets and insets[-1] == "Formula":
 413                             # avoid putting an ERT in a math; instead put command as text
 414                             command = command.replace('\\\\', '\mathrm{')
 415                             command = command + '}'
 416                         elif not insets or insets[-1] != "ERT":
 417                             # add an ERT inset with the replacement character
 418                             command = command.replace('\\\\', ert_intro)
 419                             command = command + ert_outro
 420                         else:
 421                             command = command.replace('\\\\', '\n\\backslash\n')
 422                     last_char = '' # indicate that the character should not be removed
 423                 mod_line += command
 424             else:
 425                 # Replace with replacement string
 426                 mod_line += replacement_character
 427     return mod_line
 428
 429
 430 def revert_unicode(document):
 431     '''Transform unicode characters that can not be written using the
 432 document encoding to commands according to the unicodesymbols
 433 file. Characters that can not be replaced by commands are replaced by
 434 an replacement string.  Flags other than 'combined' are currently not
 435 implemented.'''
 436     spec_chars = read_unicodesymbols()
 437     insets = [] # list of active insets
 438
 439     # Go through the document to capture all combining characters
 440     i = 0
 441     while i < len(document.body):
 442         line = document.body[i]
 443         # Check for insets
 444         if line.find('\\begin_inset') > -1:
 445             insets.append(line[13:].split()[0])
 446         if line.find('\\end_inset') > -1:
 447             del insets[-1]
 448
 449         # Try to write the line
 450         try:
 451             # If all goes well the line is written here
 452             dummy = line.encode(document.encoding)
 453             i += 1
 454         except:
 455             # Error, some character(s) in the line need to be replaced
 456             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 457             document.body[i:i+1] = mod_line.split('\n')
 458             i += len(mod_line.split('\n'))
 459
 460
 461 def revert_cs_label(document):
 462     " Remove status flag of charstyle label. "
 463     i = 0
 464     while 1:
 465         i = find_token(document.body, "\\begin_inset CharStyle", i)
 466         if i == -1:
 467             return
 468         # Seach for a line starting 'show_label'
 469         # If it is not there, break with a warning message
 470         i = i + 1
 471         while 1:
 472             if (document.body[i][:10] == "show_label"):
 473                 del document.body[i]
 474                 break
 475             elif (document.body[i][:13] == "\\begin_layout"):
 476                 document.warning("Malformed LyX document: Missing 'show_label'.")
 477                 break
 478             i = i + 1
 479
 480         i = i + 1
 481
 482
 483 def convert_bibitem(document):
 484     """ Convert
 485 \bibitem [option]{argument}
 486
 487 to
 488
 489 \begin_inset LatexCommand bibitem
 490 label "option"
 491 key "argument"
 492
 493 \end_inset
 494
 495 This must be called after convert_commandparams.
 496 """
 497     i = 0
 498     while 1:
 499         i = find_token(document.body, "\\bibitem", i)
 500         if i == -1:
 501             break
 502         j = document.body[i].find('[') + 1
 503         k = document.body[i].rfind(']')
 504         if j == 0: # No optional argument found
 505             option = None
 506         else:
 507             option = document.body[i][j:k]
 508         j = document.body[i].rfind('{') + 1
 509         k = document.body[i].rfind('}')
 510         argument = document.body[i][j:k]
 511         lines = ['\\begin_inset LatexCommand bibitem']
 512         if option != None:
 513             lines.append('label "%s"' % option.replace('"', '\\"'))
 514         lines.append('key "%s"' % argument.replace('"', '\\"'))
 515         lines.append('')
 516         lines.append('\\end_inset')
 517         document.body[i:i+1] = lines
 518         i = i + 1
 519
 520
 521 commandparams_info = {
 522     # command : [option1, option2, argument]
 523     "bibitem" : ["label", "", "key"],
 524     "bibtex" : ["options", "btprint", "bibfiles"],
 525     "cite"        : ["after", "before", "key"],
 526     "citet"       : ["after", "before", "key"],
 527     "citep"       : ["after", "before", "key"],
 528     "citealt"     : ["after", "before", "key"],
 529     "citealp"     : ["after", "before", "key"],
 530     "citeauthor"  : ["after", "before", "key"],
 531     "citeyear"    : ["after", "before", "key"],
 532     "citeyearpar" : ["after", "before", "key"],
 533     "citet*"      : ["after", "before", "key"],
 534     "citep*"      : ["after", "before", "key"],
 535     "citealt*"    : ["after", "before", "key"],
 536     "citealp*"    : ["after", "before", "key"],
 537     "citeauthor*" : ["after", "before", "key"],
 538     "Citet"       : ["after", "before", "key"],
 539     "Citep"       : ["after", "before", "key"],
 540     "Citealt"     : ["after", "before", "key"],
 541     "Citealp"     : ["after", "before", "key"],
 542     "Citeauthor"  : ["after", "before", "key"],
 543     "Citet*"      : ["after", "before", "key"],
 544     "Citep*"      : ["after", "before", "key"],
 545     "Citealt*"    : ["after", "before", "key"],
 546     "Citealp*"    : ["after", "before", "key"],
 547     "Citeauthor*" : ["after", "before", "key"],
 548     "citefield"   : ["after", "before", "key"],
 549     "citetitle"   : ["after", "before", "key"],
 550     "cite*"       : ["after", "before", "key"],
 551     "hfill" : ["", "", ""],
 552     "index"      : ["", "", "name"],
 553     "printindex" : ["", "", "name"],
 554     "label" : ["", "", "name"],
 555     "eqref"     : ["name", "", "reference"],
 556     "pageref"   : ["name", "", "reference"],
 557     "prettyref" : ["name", "", "reference"],
 558     "ref"       : ["name", "", "reference"],
 559     "vpageref"  : ["name", "", "reference"],
 560     "vref"      : ["name", "", "reference"],
 561     "tableofcontents" : ["", "", "type"],
 562     "htmlurl" : ["name", "", "target"],
 563     "url"     : ["name", "", "target"]}
 564
 565
 566 def convert_commandparams(document):
 567     """ Convert
 568
 569  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 570  \end_inset
 571
 572  to
 573
 574  \begin_inset LatexCommand cmdname
 575  name1 "opt1"
 576  name2 "opt2"
 577  name3 "arg"
 578  \end_inset
 579
 580  name1, name2 and name3 can be different for each command.
 581 """
 582     # \begin_inset LatexCommand bibitem was not the official version (see
 583     # convert_bibitem()), but could be read in, so we convert it here, too.
 584
 585     i = 0
 586     while 1:
 587         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 588         if i == -1:
 589             break
 590         command = document.body[i][26:].strip()
 591         if command == "":
 592             document.warning("Malformed LyX document: Missing LatexCommand name.")
 593             i = i + 1
 594             continue
 595
 596         j = find_token(document.body, "\\end_inset", i + 1)
 597         if j == -1:
 598             document.warning("Malformed document")
 599         else:
 600             command += "".join(document.body[i+1:j])
 601             document.body[i+1:j] = []
 602
 603         # The following parser is taken from the original InsetCommandParams::scanCommand
 604         name = ""
 605         option1 = ""
 606         option2 = ""
 607         argument = ""
 608         state = "WS"
 609         # Used to handle things like \command[foo[bar]]{foo{bar}}
 610         nestdepth = 0
 611         b = 0
 612         for c in command:
 613             if ((state == "CMDNAME" and c == ' ') or
 614                 (state == "CMDNAME" and c == '[') or
 615                 (state == "CMDNAME" and c == '{')):
 616                 state = "WS"
 617             if ((state == "OPTION" and c == ']') or
 618                 (state == "SECOPTION" and c == ']') or
 619                 (state == "CONTENT" and c == '}')):
 620                 if nestdepth == 0:
 621                     state = "WS"
 622                 else:
 623                     nestdepth = nestdepth - 1
 624             if ((state == "OPTION" and c == '[') or
 625                 (state == "SECOPTION" and c == '[') or
 626                 (state == "CONTENT" and c == '{')):
 627                 nestdepth = nestdepth + 1
 628             if state == "CMDNAME":
 629                     name += c
 630             elif state == "OPTION":
 631                     option1 += c
 632             elif state == "SECOPTION":
 633                     option2 += c
 634             elif state == "CONTENT":
 635                     argument += c
 636             elif state == "WS":
 637                 if c == '\\':
 638                     state = "CMDNAME"
 639                 elif c == '[' and b != ']':
 640                     state = "OPTION"
 641                     nestdepth = 0 # Just to be sure
 642                 elif c == '[' and b == ']':
 643                     state = "SECOPTION"
 644                     nestdepth = 0 # Just to be sure
 645                 elif c == '{':
 646                     state = "CONTENT"
 647                     nestdepth = 0 # Just to be sure
 648             b = c
 649
 650         # Now we have parsed the command, output the parameters
 651         lines = ["\\begin_inset LatexCommand %s" % name]
 652         if option1 != "":
 653             if commandparams_info[name][0] == "":
 654                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 655             else:
 656                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 657         if option2 != "":
 658             if commandparams_info[name][1] == "":
 659                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 660             else:
 661                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 662         if argument != "":
 663             if commandparams_info[name][2] == "":
 664                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 665             else:
 666                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 667         document.body[i:i+1] = lines
 668         i = i + 1
 669
 670
 671 def revert_commandparams(document):
 672     regex = re.compile(r'(\S+)\s+(.+)')
 673     i = 0
 674     while 1:
 675         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 676         if i == -1:
 677             break
 678         name = document.body[i].split()[2]
 679         j = find_end_of_inset(document.body, i + 1)
 680         preview_line = ""
 681         option1 = ""
 682         option2 = ""
 683         argument = ""
 684         for k in range(i + 1, j):
 685             match = re.match(regex, document.body[k])
 686             if match:
 687                 pname = match.group(1)
 688                 pvalue = match.group(2)
 689                 if pname == "preview":
 690                     preview_line = document.body[k]
 691                 elif (commandparams_info[name][0] != "" and
 692                       pname == commandparams_info[name][0]):
 693                     option1 = pvalue.strip('"').replace('\\"', '"')
 694                 elif (commandparams_info[name][1] != "" and
 695                       pname == commandparams_info[name][1]):
 696                     option2 = pvalue.strip('"').replace('\\"', '"')
 697                 elif (commandparams_info[name][2] != "" and
 698                       pname == commandparams_info[name][2]):
 699                     argument = pvalue.strip('"').replace('\\"', '"')
 700             elif document.body[k].strip() != "":
 701                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 702         if name == "bibitem":
 703             if option1 == "":
 704                 lines = ["\\bibitem {%s}" % argument]
 705             else:
 706                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 707         else:
 708             if option1 == "":
 709                 if option2 == "":
 710                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 711                 else:
 712                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 713             else:
 714                 if option2 == "":
 715                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 716                 else:
 717                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 718         if name != "bibitem":
 719             if preview_line != "":
 720                 lines.append(preview_line)
 721             lines.append('')
 722             lines.append('\\end_inset')
 723         document.body[i:j+1] = lines
 724         i = j + 1
 725
 726
 727 def revert_nomenclature(document):
 728     " Convert nomenclature entry to ERT. "
 729     regex = re.compile(r'(\S+)\s+(.+)')
 730     i = 0
 731     use_nomencl = 0
 732     while 1:
 733         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 734         if i == -1:
 735             break
 736         use_nomencl = 1
 737         j = find_end_of_inset(document.body, i + 1)
 738         preview_line = ""
 739         symbol = ""
 740         description = ""
 741         prefix = ""
 742         for k in range(i + 1, j):
 743             match = re.match(regex, document.body[k])
 744             if match:
 745                 name = match.group(1)
 746                 value = match.group(2)
 747                 if name == "preview":
 748                     preview_line = document.body[k]
 749                 elif name == "symbol":
 750                     symbol = value.strip('"').replace('\\"', '"')
 751                 elif name == "description":
 752                     description = value.strip('"').replace('\\"', '"')
 753                 elif name == "prefix":
 754                     prefix = value.strip('"').replace('\\"', '"')
 755             elif document.body[k].strip() != "":
 756                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 757         if prefix == "":
 758             command = 'nomenclature{%s}{%s}' % (symbol, description)
 759         else:
 760             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 761         document.body[i:j+1] = ['\\begin_inset ERT',
 762                                 'status collapsed',
 763                                 '',
 764                                 '\\begin_layout %s' % document.default_layout,
 765                                 '',
 766                                 '',
 767                                 '\\backslash',
 768                                 command,
 769                                 '\\end_layout',
 770                                 '',
 771                                 '\\end_inset']
 772         i = i + 11
 773     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 774         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 775         document.preamble.append('\\makenomenclature')
 776
 777
 778 def revert_printnomenclature(document):
 779     " Convert printnomenclature to ERT. "
 780     regex = re.compile(r'(\S+)\s+(.+)')
 781     i = 0
 782     use_nomencl = 0
 783     while 1:
 784         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 785         if i == -1:
 786             break
 787         use_nomencl = 1
 788         j = find_end_of_inset(document.body, i + 1)
 789         preview_line = ""
 790         labelwidth = ""
 791         for k in range(i + 1, j):
 792             match = re.match(regex, document.body[k])
 793             if match:
 794                 name = match.group(1)
 795                 value = match.group(2)
 796                 if name == "preview":
 797                     preview_line = document.body[k]
 798                 elif name == "labelwidth":
 799                     labelwidth = value.strip('"').replace('\\"', '"')
 800             elif document.body[k].strip() != "":
 801                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 802         if labelwidth == "":
 803             command = 'nomenclature{}'
 804         else:
 805             command = 'nomenclature[%s]' % labelwidth
 806         document.body[i:j+1] = ['\\begin_inset ERT',
 807                                 'status collapsed',
 808                                 '',
 809                                 '\\begin_layout %s' % document.default_layout,
 810                                 '',
 811                                 '',
 812                                 '\\backslash',
 813                                 command,
 814                                 '\\end_layout',
 815                                 '',
 816                                 '\\end_inset']
 817         i = i + 11
 818     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 819         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 820         document.preamble.append('\\makenomenclature')
 821
 822
 823 def convert_esint(document):
 824     " Add \\use_esint setting to header. "
 825     i = find_token(document.header, "\\cite_engine", 0)
 826     if i == -1:
 827         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 828         return
 829     # 0 is off, 1 is auto, 2 is on.
 830     document.header.insert(i, '\\use_esint 0')
 831
 832
 833 def revert_esint(document):
 834     " Remove \\use_esint setting from header. "
 835     i = find_token(document.header, "\\use_esint", 0)
 836     if i == -1:
 837         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 838         return
 839     use_esint = document.header[i].split()[1]
 840     del document.header[i]
 841     # 0 is off, 1 is auto, 2 is on.
 842     if (use_esint == 2):
 843         document.preamble.append('\\usepackage{esint}')
 844
 845
 846 def revert_clearpage(document):
 847     " clearpage -> ERT "
 848     i = 0
 849     while 1:
 850         i = find_token(document.body, "\\clearpage", i)
 851         if i == -1:
 852             break
 853         document.body[i:i+1] =  ['\\begin_inset ERT',
 854                                 'status collapsed',
 855                                 '',
 856                                 '\\begin_layout %s' % document.default_layout,
 857                                 '',
 858                                 '',
 859                                 '\\backslash',
 860                                 'clearpage',
 861                                 '\\end_layout',
 862                                 '',
 863                                 '\\end_inset']
 864     i = i + 1
 865
 866
 867 def revert_cleardoublepage(document):
 868     " cleardoublepage -> ERT "
 869     i = 0
 870     while 1:
 871         i = find_token(document.body, "\\cleardoublepage", i)
 872         if i == -1:
 873             break
 874         document.body[i:i+1] =  ['\\begin_inset ERT',
 875                                 'status collapsed',
 876                                 '',
 877                                 '\\begin_layout %s' % document.default_layout,
 878                                 '',
 879                                 '',
 880                                 '\\backslash',
 881                                 'cleardoublepage',
 882                                 '\\end_layout',
 883                                 '',
 884                                 '\\end_inset']
 885     i = i + 1
 886
 887
 888 def convert_lyxline(document):
 889     " remove fontsize commands for \lyxline "
 890     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 891     # to change the line thickness. The new definiton does this so that imported
 892     # \lyxlines would have a different line thickness. The eventual fontsize command
 893     # before \lyxline is therefore removed to get the same output.
 894     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 895                  "large", "Large", "LARGE", "huge", "Huge"]
 896     for n in range(0, len(fontsizes)):
 897         i = 0
 898         k = 0
 899         while i < len(document.body):
 900             i = find_token(document.body, "\\size " + fontsizes[n], i)
 901             k = find_token(document.body, "\\lyxline", i)
 902             # the corresponding fontsize command is always 2 lines before the \lyxline
 903             if (i != -1 and k == i+2):
 904                 document.body[i:i+1] = []
 905             else:
 906                 break
 907         i = i + 1
 908
 909
 910 def revert_encodings(document):
 911     " Set new encodings to auto. "
 912     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 913                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 914                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 915     i = find_token(document.header, "\\inputencoding", 0)
 916     if i == -1:
 917         document.header.append("\\inputencoding auto")
 918     else:
 919         inputenc = get_value(document.header, "\\inputencoding", i)
 920         if inputenc in encodings:
 921             document.header[i] = "\\inputencoding auto"
 922     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 923
 924
 925 def convert_caption(document):
 926     " Convert caption layouts to caption insets. "
 927     i = 0
 928     while 1:
 929         i = find_token(document.body, "\\begin_layout Caption", i)
 930         if i == -1:
 931             return
 932         j = find_end_of_layout(document.body, i)
 933         if j == -1:
 934             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 935             return
 936
 937         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 938         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 939                             "\\begin_inset Caption", "",
 940                             "\\begin_layout %s" % document.default_layout]
 941         i = i + 1
 942
 943
 944 def revert_caption(document):
 945     " Convert caption insets to caption layouts. "
 946     " This assumes that the text class has a caption style. "
 947     i = 0
 948     while 1:
 949         i = find_token(document.body, "\\begin_inset Caption", i)
 950         if i == -1:
 951             return
 952
 953         # We either need to delete the previous \begin_layout line, or we
 954         # need to end the previous layout if this inset is not in the first
 955         # position of the paragraph.
 956         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 957         if layout_before == -1:
 958             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 959             return
 960         layout_line = document.body[layout_before]
 961         del_layout_before = True
 962         l = layout_before + 1
 963         while l < i:
 964             if document.body[l] != "":
 965                 del_layout_before = False
 966                 break
 967             l = l + 1
 968         if del_layout_before:
 969             del document.body[layout_before:i]
 970             i = layout_before
 971         else:
 972             document.body[i:i] = ["\\end_layout", ""]
 973             i = i + 2
 974
 975         # Find start of layout in the inset and end of inset
 976         j = find_token(document.body, "\\begin_layout", i)
 977         if j == -1:
 978             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 979             return
 980         k = find_end_of_inset(document.body, i)
 981         if k == -1:
 982             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 983             return
 984
 985         # We either need to delete the following \end_layout line, or we need
 986         # to restart the old layout if this inset is not at the paragraph end.
 987         layout_after = find_token(document.body, "\\end_layout", k)
 988         if layout_after == -1:
 989             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 990             return
 991         del_layout_after = True
 992         l = k + 1
 993         while l < layout_after:
 994             if document.body[l] != "":
 995                 del_layout_after = False
 996                 break
 997             l = l + 1
 998         if del_layout_after:
 999             del document.body[k+1:layout_after+1]
1000         else:
1001             document.body[k+1:k+1] = [layout_line, ""]
1002
1003         # delete \begin_layout and \end_inset and replace \begin_inset with
1004         # "\begin_layout Caption". This works because we can only have one
1005         # paragraph in the caption inset: The old \end_layout will be recycled.
1006         del document.body[k]
1007         if document.body[k] == "":
1008             del document.body[k]
1009         del document.body[j]
1010         if document.body[j] == "":
1011             del document.body[j]
1012         document.body[i] = "\\begin_layout Caption"
1013         if document.body[i+1] == "":
1014             del document.body[i+1]
1015         i = i + 1
1016
1017
1018 # Accents of InsetLaTeXAccent
1019 accent_map = {
1020     "`" : u'\u0300', # grave
1021     "'" : u'\u0301', # acute
1022     "^" : u'\u0302', # circumflex
1023     "~" : u'\u0303', # tilde
1024     "=" : u'\u0304', # macron
1025     "u" : u'\u0306', # breve
1026     "." : u'\u0307', # dot above
1027     "\"": u'\u0308', # diaeresis
1028     "r" : u'\u030a', # ring above
1029     "H" : u'\u030b', # double acute
1030     "v" : u'\u030c', # caron
1031     "b" : u'\u0320', # minus sign below
1032     "d" : u'\u0323', # dot below
1033     "c" : u'\u0327', # cedilla
1034     "k" : u'\u0328', # ogonek
1035     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
1036                      # only one is given as argument, so we don't need to
1037                      # treat it differently.
1038 }
1039
1040
1041 # special accents of InsetLaTeXAccent without argument
1042 special_accent_map = {
1043     'i' : u'\u0131', # dotless i
1044     'j' : u'\u0237', # dotless j
1045     'l' : u'\u0142', # l with stroke
1046     'L' : u'\u0141'  # L with stroke
1047 }
1048
1049
1050 # special accent arguments of InsetLaTeXAccent
1051 accented_map = {
1052     '\\i' : u'\u0131', # dotless i
1053     '\\j' : u'\u0237'  # dotless j
1054 }
1055
1056
1057 def _convert_accent(accent, accented_char):
1058     type = accent
1059     char = accented_char
1060     if char == '':
1061         if type in special_accent_map:
1062             return special_accent_map[type]
1063         # a missing char is treated as space by LyX
1064         char = ' '
1065     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1066         # Special caron, only used with t, d, l and L.
1067         # It is not in the map because we convert it to the same unicode
1068         # character as the normal caron: \q{} is only defined if babel with
1069         # the czech or slovak language is used, and the normal caron
1070         # produces the correct output if the T1 font encoding is used.
1071         # For the same reason we never convert to \q{} in the other direction.
1072         type = 'v'
1073     elif char in accented_map:
1074         char = accented_map[char]
1075     elif (len(char) > 1):
1076         # We can only convert accents on a single char
1077         return ''
1078     a = accent_map.get(type)
1079     if a:
1080         return unicodedata.normalize("NFC", "%s%s" % (char, a))
1081     return ''
1082
1083
1084 def convert_ertbackslash(body, i, ert, default_layout):
1085     r""" -------------------------------------------------------------------------------------------
1086     Convert backslashes and '\n' into valid ERT code, append the converted
1087     text to body[i] and return the (maybe incremented) line index i"""
1088
1089     for c in ert:
1090         if c == '\\':
1091             body[i] = body[i] + '\\backslash '
1092             i = i + 1
1093             body.insert(i, '')
1094         elif c == '\n':
1095             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1096             i = i + 4
1097         else:
1098             body[i] = body[i] + c
1099     return i
1100
1101
1102 def convert_accent(document):
1103     # The following forms are supported by LyX:
1104     # '\i \"{a}' (standard form, as written by LyX)
1105     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1106     # '\i \"{ }' (also accepted if the accented char is a space)
1107     # '\i \" a'  (also accepted)
1108     # '\i \"'    (also accepted)
1109     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1110     re_contents = re.compile(r'^([^\s{]+)(.*)$')
1111     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1112     i = 0
1113     while 1:
1114         i = find_re(document.body, re_wholeinset, i)
1115         if i == -1:
1116             return
1117         match = re_wholeinset.match(document.body[i])
1118         prefix = match.group(1)
1119         contents = match.group(3).strip()
1120         match = re_contents.match(contents)
1121         if match:
1122             # Strip first char (always \)
1123             accent = match.group(1)[1:]
1124             accented_contents = match.group(2).strip()
1125             match = re_accentedcontents.match(accented_contents)
1126             accented_char = match.group(1)
1127             converted = _convert_accent(accent, accented_char)
1128             if converted == '':
1129                 # Normalize contents
1130                 contents = '%s{%s}' % (accent, accented_char),
1131             else:
1132                 document.body[i] = '%s%s' % (prefix, converted)
1133                 i += 1
1134                 continue
1135         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1136         document.body[i] = prefix
1137         document.body[i+1:i+1] = ['\\begin_inset ERT',
1138                                   'status collapsed',
1139                                   '',
1140                                   '\\begin_layout %s' % document.default_layout,
1141                                   '',
1142                                   '',
1143                                   '']
1144         i = convert_ertbackslash(document.body, i + 7,
1145                                  '\\%s' % contents,
1146                                  document.default_layout)
1147         document.body[i+1:i+1] = ['\\end_layout',
1148                                   '',
1149                                   '\\end_inset']
1150         i += 3
1151
1152
1153 def is_inset_line(document, i):
1154     """ Line i of body has an inset """
1155     if document.body[i][:1] == '\\':
1156         return True
1157     last_tokens = "".join(document.body[i].split()[-2:])
1158     return last_tokens.find('\\') != -1
1159
1160
1161 def revert_accent(document):
1162     inverse_accent_map = {}
1163     for k in accent_map:
1164         inverse_accent_map[accent_map[k]] = k
1165     inverse_special_accent_map = {}
1166     for k in special_accent_map:
1167         inverse_special_accent_map[special_accent_map[k]] = k
1168     inverse_accented_map = {}
1169     for k in accented_map:
1170         inverse_accented_map[accented_map[k]] = k
1171
1172     # Since LyX may insert a line break within a word we must combine all
1173     # words before unicode normalization.
1174     # We do this only if the next line starts with an accent, otherwise we
1175     # would create things like '\begin_inset ERTstatus'.
1176     for i in range(len(document.body) - 1):
1177         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1178             continue
1179         if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1180             # the last character of this line and the first of the next line
1181             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1182             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1183                 document.body[i] += document.body[i+1][0]
1184                 document.body[i+1] = document.body[i+1][1:]
1185
1186     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1187     # This is needed to catch all accented characters.
1188     for i in range(len(document.body)):
1189         # Unfortunately we have a mixture of unicode strings and plain strings,
1190         # because we never use u'xxx' for string literals, but 'xxx'.
1191         # Therefore we may have to try two times to normalize the data.
1192         try:
1193             document.body[i] = unicodedata.normalize("NFD", document.body[i])
1194         except TypeError:
1195             document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1196
1197     # Replace accented characters with InsetLaTeXAccent
1198     # Do not convert characters that can be represented in the chosen
1199     # encoding.
1200     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1201     lang_re = re.compile(r"^\\lang\s(\S+)")
1202
1203     i = 0
1204     while i < len(document.body):
1205         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1206             # Track the encoding of the current line
1207             result = lang_re.match(document.body[i])
1208             if result:
1209                 language = result.group(1)
1210                 if language == "default":
1211                     encoding_stack[-1] = document.encoding
1212                 else:
1213                     from lyx2lyx_lang import lang
1214                     encoding_stack[-1] = lang[language][3]
1215                 continue
1216             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1217                 encoding_stack.append(encoding_stack[-1])
1218                 continue
1219             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1220                 del encoding_stack[-1]
1221                 continue
1222
1223         for j in range(len(document.body[i])):
1224             # dotless i and dotless j are both in special_accent_map and can
1225             # occur as an accented character, so we need to test that the
1226             # following character is no accent
1227             if (document.body[i][j] in inverse_special_accent_map and
1228                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1229                 accent = document.body[i][j]
1230                 try:
1231                     dummy = accent.encode(encoding_stack[-1])
1232                 except UnicodeEncodeError:
1233                     # Insert the rest of the line as new line
1234                     if j < len(document.body[i]) - 1:
1235                         document.body.insert(i+1, document.body[i][j+1:])
1236                     # Delete the accented character
1237                     document.body[i] = document.body[i][:j]
1238                     # Finally add the InsetLaTeXAccent
1239                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1240                     break
1241             elif j > 0 and document.body[i][j] in inverse_accent_map:
1242                 accented_char = document.body[i][j-1]
1243                 if accented_char == ' ':
1244                     # Conform to LyX output
1245                     accented_char = ''
1246                 elif accented_char in inverse_accented_map:
1247                     accented_char = inverse_accented_map[accented_char]
1248                 accent = document.body[i][j]
1249                 try:
1250                     dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1251                 except UnicodeEncodeError:
1252                     # Insert the rest of the line as new line
1253                     if j < len(document.body[i]) - 1:
1254                         document.body.insert(i+1, document.body[i][j+1:])
1255                     # Delete the accented characters
1256                     document.body[i] = document.body[i][:j-1]
1257                     # Finally add the InsetLaTeXAccent
1258                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1259                     break
1260         i = i + 1
1261
1262     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1263     for i in range(len(document.body)):
1264         document.body[i] = unicodedata.normalize("NFC", document.body[i])
1265
1266
1267 def normalize_font_whitespace_259(document):
1268     """ Before format 259 the font changes were ignored if a
1269     whitespace was the first or last character in the sequence, this function
1270     transfers the whitespace outside."""
1271
1272     char_properties = {"\\series": "default",
1273                        "\\emph": "default",
1274                        "\\color": "none",
1275                        "\\shape": "default",
1276                        "\\bar": "default",
1277                        "\\family": "default"}
1278     return normalize_font_whitespace(document, char_properties)
1279
1280 def normalize_font_whitespace_274(document):
1281     """ Before format 259 (sic) the font changes were ignored if a
1282     whitespace was the first or last character in the sequence. This was
1283     corrected for most font properties in format 259, but the language
1284     was forgotten then. This function applies the same conversion done
1285     there (namely, transfers the whitespace outside) for font language
1286     changes, as well."""
1287
1288     char_properties = {"\\lang": "default"}
1289     return normalize_font_whitespace(document, char_properties)
1290
1291 def get_paragraph_language(document, i):
1292     """ Return the language of the paragraph in which line i of the document
1293     body is. If the first thing in the paragraph is a \\lang command, that
1294     is the paragraph's langauge; otherwise, the paragraph's language is the
1295     document's language."""
1296
1297     lines = document.body
1298
1299     first_nonempty_line = \
1300         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1301
1302     words = lines[first_nonempty_line].split()
1303
1304     if len(words) > 1 and words[0] == "\\lang":
1305         return words[1]
1306     else:
1307         return document.language
1308
1309 def normalize_font_whitespace(document, char_properties):
1310     """ Before format 259 the font changes were ignored if a
1311     whitespace was the first or last character in the sequence, this function
1312     transfers the whitespace outside. Only a change in one of the properties
1313     in the provided     char_properties is handled by this function."""
1314
1315     if document.backend != "latex":
1316         return
1317
1318     lines = document.body
1319
1320     changes = {}
1321
1322     i = 0
1323     while i < len(lines):
1324         words = lines[i].split()
1325
1326         if len(words) > 0 and words[0] == "\\begin_layout":
1327             # a new paragraph resets all font changes
1328             changes.clear()
1329             # also reset the default language to be the paragraph's language
1330             if "\\lang" in char_properties.keys():
1331                 char_properties["\\lang"] = \
1332                     get_paragraph_language(document, i + 1)
1333
1334         elif len(words) > 1 and words[0] in char_properties.keys():
1335             # we have a font change
1336             if char_properties[words[0]] == words[1]:
1337                 # property gets reset
1338                 if words[0] in changes.keys():
1339                     del changes[words[0]]
1340                 defaultproperty = True
1341             else:
1342                 # property gets set
1343                 changes[words[0]] = words[1]
1344                 defaultproperty = False
1345
1346             # We need to explicitly reset all changed properties if we find
1347             # a space below, because LyX 1.4 would output the space after
1348             # closing the previous change and before starting the new one,
1349             # and closing a font change means to close all properties, not
1350             # just the changed one.
1351
1352             if lines[i-1] and lines[i-1][-1] == " ":
1353                 lines[i-1] = lines[i-1][:-1]
1354                 # a space before the font change
1355                 added_lines = [" "]
1356                 for k in changes.keys():
1357                     # exclude property k because that is already in lines[i]
1358                     if k != words[0]:
1359                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1360                 for k in changes.keys():
1361                     # exclude property k because that must be added below anyway
1362                     if k != words[0]:
1363                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1364                 if defaultproperty:
1365                     # Property is reset in lines[i], so add the new stuff afterwards
1366                     lines[i+1:i+1] = added_lines
1367                 else:
1368                     # Reset property for the space
1369                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1370                     lines[i:i] = added_lines
1371                 i = i + len(added_lines)
1372
1373             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1374                 # a space after the font change
1375                 if (lines[i+1] == " " and lines[i+2]):
1376                     next_words = lines[i+2].split()
1377                     if len(next_words) > 0 and next_words[0] == words[0]:
1378                         # a single blank with a property different from the
1379                         # previous and the next line must not be changed
1380                         i = i + 2
1381                         continue
1382                 lines[i+1] = lines[i+1][1:]
1383                 added_lines = [" "]
1384                 for k in changes.keys():
1385                     # exclude property k because that is already in lines[i]
1386                     if k != words[0]:
1387                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1388                 for k in changes.keys():
1389                     # exclude property k because that must be added below anyway
1390                     if k != words[0]:
1391                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1392                 # Reset property for the space
1393                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1394                 lines[i:i] = added_lines
1395                 i = i + len(added_lines)
1396
1397         i = i + 1
1398
1399
1400 def revert_utf8x(document):
1401     " Set utf8x encoding to utf8. "
1402     i = find_token(document.header, "\\inputencoding", 0)
1403     if i == -1:
1404         document.header.append("\\inputencoding auto")
1405     else:
1406         inputenc = get_value(document.header, "\\inputencoding", i)
1407         if inputenc == "utf8x":
1408             document.header[i] = "\\inputencoding utf8"
1409     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1410
1411
1412 def revert_utf8plain(document):
1413     " Set utf8plain encoding to utf8. "
1414     i = find_token(document.header, "\\inputencoding", 0)
1415     if i == -1:
1416         document.header.append("\\inputencoding auto")
1417     else:
1418         inputenc = get_value(document.header, "\\inputencoding", i)
1419         if inputenc == "utf8-plain":
1420             document.header[i] = "\\inputencoding utf8"
1421     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1422
1423
1424 def revert_beamer_alert(document):
1425     " Revert beamer's \\alert inset back to ERT. "
1426     i = 0
1427     while 1:
1428         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1429         if i == -1:
1430             return
1431         document.body[i] = "\\begin_inset ERT"
1432         i = i + 1
1433         while 1:
1434             if (document.body[i][:13] == "\\begin_layout"):
1435                 # Insert the \alert command
1436                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1437                 break
1438             i = i + 1
1439
1440         i = i + 1
1441
1442
1443 def revert_beamer_structure(document):
1444     " Revert beamer's \\structure inset back to ERT. "
1445     i = 0
1446     while 1:
1447         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1448         if i == -1:
1449             return
1450         document.body[i] = "\\begin_inset ERT"
1451         i = i + 1
1452         while 1:
1453             if (document.body[i][:13] == "\\begin_layout"):
1454                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1455                 break
1456             i = i + 1
1457
1458         i = i + 1
1459
1460
1461 def convert_changes(document):
1462     " Switch output_changes off if tracking_changes is off. "
1463     i = find_token(document.header, '\\tracking_changes', 0)
1464     if i == -1:
1465         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1466         return
1467     j = find_token(document.header, '\\output_changes', 0)
1468     if j == -1:
1469         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1470         return
1471     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1472     output_changes = get_value(document.header, "\\output_changes", j)
1473     if tracking_changes == "false" and output_changes == "true":
1474         document.header[j] = "\\output_changes false"
1475
1476
1477 def revert_ascii(document):
1478     " Set ascii encoding to auto. "
1479     i = find_token(document.header, "\\inputencoding", 0)
1480     if i == -1:
1481         document.header.append("\\inputencoding auto")
1482     else:
1483         inputenc = get_value(document.header, "\\inputencoding", i)
1484         if inputenc == "ascii":
1485             document.header[i] = "\\inputencoding auto"
1486     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1487
1488
1489 def normalize_language_name(document):
1490     lang = { "brazil": "brazilian",
1491              "portuges": "portuguese"}
1492
1493     if document.language in lang:
1494         document.language = lang[document.language]
1495         i = find_token(document.header, "\\language", 0)
1496         document.header[i] = "\\language %s" % document.language
1497
1498
1499 def revert_language_name(document):
1500     lang = { "brazilian": "brazil",
1501              "portuguese": "portuges"}
1502
1503     if document.language in lang:
1504         document.language = lang[document.language]
1505         i = find_token(document.header, "\\language", 0)
1506         document.header[i] = "\\language %s" % document.language
1507
1508 #
1509 #  \textclass cv -> \textclass simplecv
1510 def convert_cv_textclass(document):
1511     if document.textclass == "cv":
1512         document.textclass = "simplecv"
1513
1514
1515 def revert_cv_textclass(document):
1516     if document.textclass == "simplecv":
1517         document.textclass = "cv"
1518
1519
1520 #
1521 # add scaleBeforeRotation graphics param
1522 def convert_graphics_rotation(document):
1523     " add scaleBeforeRotation graphics parameter. "
1524     i = 0
1525     while 1:
1526         i = find_token(document.body, "\\begin_inset Graphics", i)
1527         if i == -1:
1528             return
1529         j = find_end_of_inset(document.body, i+1)
1530         if j == -1:
1531             # should not happen
1532             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1533         # Seach for rotateAngle and width or height or scale
1534         # If these params are not there, nothing needs to be done.
1535         k = find_token(document.body, "\trotateAngle", i + 1, j)
1536         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1537         if (k != -1 and l != -1):
1538             document.body.insert(j, 'scaleBeforeRotation')
1539         i = i + 1
1540
1541
1542 #
1543 # remove scaleBeforeRotation graphics param
1544 def revert_graphics_rotation(document):
1545     " remove scaleBeforeRotation graphics parameter. "
1546     i = 0
1547     while 1:
1548         i = find_token(document.body, "\\begin_inset Graphics", i)
1549         if i == -1:
1550             return
1551         j = find_end_of_inset(document.body, i + 1)
1552         if j == -1:
1553             # should not happen
1554             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1555         # If there's a scaleBeforeRotation param, just remove that
1556         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1557         if k != -1:
1558             del document.body[k]
1559         else:
1560             # if not, and if we have rotateAngle and width or height or scale,
1561             # we have to put the rotateAngle value to special
1562             rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1563             special = get_value(document.body, 'special', i + 1, j)
1564             if rotateAngle != "":
1565                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1566                 if k == -1:
1567                     break
1568                 if special == "":
1569                     document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1570                 else:
1571                     l = find_token(document.body, "\tspecial", i + 1, j)
1572                     document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1573                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1574                 if k != -1:
1575                     del document.body[k]
1576         i = i + 1
1577
1578
1579
1580 def convert_tableborder(document):
1581     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1582     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1583     i = 0
1584     while i < len(document.body):
1585         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1586         k = document.body[i].find("|>{", 0, len(document.body[i]))
1587         # the two tokens have to be in one line
1588         if (h != -1 and k != -1):
1589             # delete the "|"
1590             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1591         i = i + 1
1592
1593
1594 def revert_tableborder(document):
1595     i = 0
1596     while i < len(document.body):
1597         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1598         k = document.body[i].find(">{", 0, len(document.body[i]))
1599         # the two tokens have to be in one line
1600         if (h != -1 and k != -1):
1601             # add the "|"
1602             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1603         i = i + 1
1604
1605
1606 def revert_armenian(document):
1607
1608     # set inputencoding from armscii8 to auto
1609     if document.inputencoding == "armscii8":
1610         i = find_token(document.header, "\\inputencoding", 0)
1611         if i != -1:
1612             document.header[i] = "\\inputencoding auto"
1613     # check if preamble exists, if not k is set to -1
1614     i = 0
1615     k = -1
1616     while i < len(document.preamble):
1617         if k == -1:
1618             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1619         if k == -1:
1620             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1621         i = i + 1
1622     # add the entry \usepackage{armtex} to the document preamble
1623     if document.language == "armenian":
1624         # set the armtex entry as the first preamble line
1625         if k != -1:
1626             document.preamble[0:0] = ["\\usepackage{armtex}"]
1627         # create the preamble when it doesn't exist
1628         else:
1629             document.preamble.append('\\usepackage{armtex}')
1630     # Set document language from armenian to english
1631     if document.language == "armenian":
1632         document.language = "english"
1633         i = find_token(document.header, "\\language", 0)
1634         if i != -1:
1635             document.header[i] = "\\language english"
1636
1637
1638 def revert_CJK(document):
1639     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1640     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1641                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1642     i = find_token(document.header, "\\inputencoding", 0)
1643     if i == -1:
1644         document.header.append("\\inputencoding auto")
1645     else:
1646         inputenc = get_value(document.header, "\\inputencoding", i)
1647         if inputenc in encodings:
1648             document.header[i] = "\\inputencoding default"
1649     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1650
1651     if document.language == "chinese-simplified" or \
1652        document.language == "chinese-traditional" or \
1653        document.language == "japanese" or document.language == "korean":
1654         document.language = "english"
1655         i = find_token(document.header, "\\language", 0)
1656         if i != -1:
1657             document.header[i] = "\\language english"
1658
1659
1660 def revert_preamble_listings_params(document):
1661     " Revert preamble option \listings_params "
1662     i = find_token(document.header, "\\listings_params", 0)
1663     if i != -1:
1664         document.preamble.append('\\usepackage{listings}')
1665         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1666         document.header.pop(i);
1667
1668
1669 def revert_listings_inset(document):
1670     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1671 FROM
1672
1673 \begin_inset
1674 lstparams "language=Delphi"
1675 inline true
1676 status open
1677
1678 \begin_layout Standard
1679 var i = 10;
1680 \end_layout
1681
1682 \end_inset
1683
1684 TO
1685
1686 \begin_inset ERT
1687 status open
1688 \begin_layout Standard
1689
1690
1691 \backslash
1692 lstinline[language=Delphi]{var i = 10;}
1693 \end_layout
1694
1695 \end_inset
1696
1697 There can be an caption inset in this inset
1698
1699 \begin_layout Standard
1700 \begin_inset Caption
1701
1702 \begin_layout Standard
1703 before label
1704 \begin_inset LatexCommand label
1705 name "lst:caption"
1706
1707 \end_inset
1708
1709 after label
1710 \end_layout
1711
1712 \end_inset
1713
1714
1715 \end_layout
1716
1717 '''
1718     i = 0
1719     while True:
1720         i = find_token(document.body, '\\begin_inset listings', i)
1721         if i == -1:
1722             break
1723         else:
1724             if not '\\usepackage{listings}' in document.preamble:
1725                 document.preamble.append('\\usepackage{listings}')
1726         j = find_end_of_inset(document.body, i + 1)
1727         if j == -1:
1728             # this should not happen
1729             break
1730         inline = 'false'
1731         params = ''
1732         status = 'open'
1733         # first three lines
1734         for line in range(i + 1, i + 4):
1735             if document.body[line].startswith('inline'):
1736                 inline = document.body[line].split()[1]
1737             if document.body[line].startswith('lstparams'):
1738                 params = document.body[line].split()[1].strip('"')
1739             if document.body[line].startswith('status'):
1740                 status = document.body[line].split()[1].strip()
1741                 k = line + 1
1742         # caption?
1743         caption = ''
1744         label = ''
1745         cap = find_token(document.body, '\\begin_inset Caption', i)
1746         if cap != -1:
1747             cap_end = find_end_of_inset(document.body, cap + 1)
1748             if cap_end == -1:
1749                 # this should not happen
1750                 break
1751             # label?
1752             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1753             if lbl != -1:
1754                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1755                 if lbl_end == -1:
1756                     # this should not happen
1757                     break
1758             else:
1759                 lbl = cap_end
1760                 lbl_end = cap_end
1761             for line in document.body[lbl : lbl_end + 1]:
1762                 if line.startswith('name '):
1763                     label = line.split()[1].strip('"')
1764                     break
1765             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1766                 if not line.startswith('\\'):
1767                     caption += line.strip()
1768             k = cap_end + 1
1769         inlinecode = ''
1770         # looking for the oneline code for lstinline
1771         inlinecode = document.body[find_end_of_layout(document.body,
1772             find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1773         if len(caption) > 0:
1774             if len(params) == 0:
1775                 params = 'caption={%s}' % caption
1776             else:
1777                 params += ',caption={%s}' % caption
1778         if len(label) > 0:
1779             if len(params) == 0:
1780                 params = 'label={%s}' % label
1781             else:
1782                 params += ',label={%s}' % label
1783         if len(params) > 0:
1784             params = '[%s]' % params
1785             params = params.replace('\\', '\\backslash\n')
1786         if inline == 'true':
1787             document.body[i:(j+1)] = [r'\begin_inset ERT',
1788                                       'status %s' % status,
1789                                       r'\begin_layout %s' % document.default_layout,
1790                                       '',
1791                                       '',
1792                                       r'\backslash',
1793                                       'lstinline%s{%s}' % (params, inlinecode),
1794                                       r'\end_layout',
1795                                       '',
1796                                       r'\end_inset']
1797         else:
1798             document.body[i: j+1] =  [r'\begin_inset ERT',
1799                                       'status %s' % status,
1800                                       '',
1801                                       r'\begin_layout %s' % document.default_layout,
1802                                       '',
1803                                       '',
1804                                       r'\backslash',
1805                                       r'begin{lstlisting}%s' % params,
1806                                       r'\end_layout',
1807                                       '',
1808                                       r'\begin_layout %s' % document.default_layout,
1809                                     ] + document.body[k : j - 1] + \
1810                                      ['',
1811                                       r'\begin_layout %s' % document.default_layout,
1812                                       '',
1813                                       r'\backslash',
1814                                       'end{lstlisting}',
1815                                       r'\end_layout',
1816                                       '',
1817                                       r'\end_inset']
1818
1819
1820 def revert_include_listings(document):
1821     r''' Revert lstinputlisting Include option , translate
1822 \begin_inset Include \lstinputlisting{file}[opt]
1823 preview false
1824
1825 \end_inset
1826
1827 TO
1828
1829 \begin_inset ERT
1830 status open
1831
1832 \begin_layout Standard
1833
1834
1835 \backslash
1836 lstinputlisting{file}[opt]
1837 \end_layout
1838
1839 \end_inset
1840     '''
1841
1842     i = 0
1843     while True:
1844         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1845         if i == -1:
1846             break
1847         else:
1848             if not '\\usepackage{listings}' in document.preamble:
1849                 document.preamble.append('\\usepackage{listings}')
1850         j = find_end_of_inset(document.body, i + 1)
1851         if j == -1:
1852             # this should not happen
1853             break
1854         # find command line lstinputlisting{file}[options]
1855         cmd, file, option = '', '', ''
1856         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1857             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1858         option = option.replace('\\', '\\backslash\n')
1859         document.body[i : j + 1] = [r'\begin_inset ERT',
1860                                     'status open',
1861                                     '',
1862                                     r'\begin_layout %s' % document.default_layout,
1863                                     '',
1864                                     '',
1865                                     r'\backslash',
1866                                     '%s%s{%s}' % (cmd, option, file),
1867                                     r'\end_layout',
1868                                     '',
1869                                     r'\end_inset']
1870
1871
1872 def revert_ext_font_sizes(document):
1873     if document.backend != "latex": return
1874     if not document.textclass.startswith("ext"): return
1875
1876     fontsize = get_value(document.header, '\\paperfontsize', 0)
1877     if fontsize not in ('10', '11', '12'): return
1878     fontsize += 'pt'
1879
1880     i = find_token(document.header, '\\paperfontsize', 0)
1881     document.header[i] = '\\paperfontsize default'
1882
1883     i = find_token(document.header, '\\options', 0)
1884     if i == -1:
1885         i = find_token(document.header, '\\textclass', 0) + 1
1886         document.header[i:i] = ['\\options %s' % fontsize]
1887     else:
1888         document.header[i] += ',%s' % fontsize
1889
1890
1891 def convert_ext_font_sizes(document):
1892     if document.backend != "latex": return
1893     if not document.textclass.startswith("ext"): return
1894
1895     fontsize = get_value(document.header, '\\paperfontsize', 0)
1896     if fontsize != 'default': return
1897
1898     i = find_token(document.header, '\\options', 0)
1899     if i == -1: return
1900
1901     options = get_value(document.header, '\\options', i)
1902
1903     fontsizes = '10pt', '11pt', '12pt'
1904     for fs in fontsizes:
1905         if options.find(fs) != -1:
1906             break
1907     else: # this else will only be attained if the for cycle had no match
1908         return
1909
1910     options = options.split(',')
1911     for j, opt in enumerate(options):
1912         if opt in fontsizes:
1913             fontsize = opt[:-2]
1914             del options[j]
1915             break
1916     else:
1917         return
1918
1919     k = find_token(document.header, '\\paperfontsize', 0)
1920     document.header[k] = '\\paperfontsize %s' % fontsize
1921
1922     if options:
1923         document.header[i] = '\\options %s' % ','.join(options)
1924     else:
1925         del document.header[i]
1926
1927
1928 def revert_separator_layout(document):
1929     r'''Revert --Separator-- to a lyx note
1930 From
1931
1932 \begin_layout --Separator--
1933 something
1934 \end_layout
1935
1936 to
1937
1938 \begin_layout Standard
1939 \begin_inset Note Note
1940 status open
1941
1942 \begin_layout Standard
1943 Separate Evironment
1944 \end_layout
1945
1946 \end_inset
1947 something
1948
1949 \end_layout
1950
1951     '''
1952
1953     i = 0
1954     while True:
1955         i = find_token(document.body, r'\begin_layout --Separator--', i)
1956         if i == -1:
1957             break
1958         j = find_end_of_layout(document.body, i + 1)
1959         if j == -1:
1960             # this should not happen
1961             break
1962         document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1963                                     r'\begin_inset Note Note',
1964                                     'status open',
1965                                     '',
1966                                     r'\begin_layout %s' % document.default_layout,
1967                                     'Separate Environment',
1968                                     r'\end_layout',
1969                                     '',
1970                                     r'\end_inset'] + \
1971                                     document.body[ i + 1 : j] + \
1972                                     ['',
1973                                     r'\end_layout'
1974                                     ]
1975
1976
1977 def convert_arabic (document):
1978     if document.language == "arabic":
1979         document.language = "arabic_arabtex"
1980         i = find_token(document.header, "\\language", 0)
1981         if i != -1:
1982             document.header[i] = "\\language arabic_arabtex"
1983     i = 0
1984     while i < len(document.body):
1985         h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1986         if (h != -1):
1987             # change the language name
1988             document.body[i] = '\lang arabic_arabtex'
1989         i = i + 1
1990
1991
1992 def revert_arabic (document):
1993     if document.language == "arabic_arabtex":
1994         document.language = "arabic"
1995         i = find_token(document.header, "\\language", 0)
1996         if i != -1:
1997             document.header[i] = "\\language arabic"
1998     i = 0
1999     while i < len(document.body):
2000         h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2001         if (h != -1):
2002             # change the language name
2003             document.body[i] = '\lang arabic'
2004         i = i + 1
2005
2006
2007 ##
2008 # Conversion hub
2009 #
2010
2011 supported_versions = ["1.5.0","1.5"]
2012 convert = [[246, []],
2013            [247, [convert_font_settings]],
2014            [248, []],
2015            [249, [convert_utf8]],
2016            [250, []],
2017            [251, []],
2018            [252, [convert_commandparams, convert_bibitem]],
2019            [253, []],
2020            [254, [convert_esint]],
2021            [255, []],
2022            [256, []],
2023            [257, [convert_caption]],
2024            [258, [convert_lyxline]],
2025            [259, [convert_accent, normalize_font_whitespace_259]],
2026            [260, []],
2027            [261, [convert_changes]],
2028            [262, []],
2029            [263, [normalize_language_name]],
2030            [264, [convert_cv_textclass]],
2031            [265, [convert_tableborder]],
2032            [266, []],
2033            [267, []],
2034            [268, []],
2035            [269, []],
2036            [270, []],
2037            [271, [convert_ext_font_sizes]],
2038            [272, []],
2039            [273, []],
2040            [274, [normalize_font_whitespace_274]],
2041            [275, [convert_graphics_rotation]],
2042            [276, [convert_arabic]]
2043           ]
2044
2045 revert =  [
2046            [275, [revert_arabic]],
2047            [274, [revert_graphics_rotation]],
2048            [273, []],
2049            [272, [revert_separator_layout]],
2050            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2051            [270, [revert_ext_font_sizes]],
2052            [269, [revert_beamer_alert, revert_beamer_structure]],
2053            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2054            [267, [revert_CJK]],
2055            [266, [revert_utf8plain]],
2056            [265, [revert_armenian]],
2057            [264, [revert_tableborder]],
2058            [263, [revert_cv_textclass]],
2059            [262, [revert_language_name]],
2060            [261, [revert_ascii]],
2061            [260, []],
2062            [259, [revert_utf8x]],
2063            [258, []],
2064            [257, []],
2065            [256, [revert_caption]],
2066            [255, [revert_encodings]],
2067            [254, [revert_clearpage, revert_cleardoublepage]],
2068            [253, [revert_esint]],
2069            [252, [revert_nomenclature, revert_printnomenclature]],
2070            [251, [revert_commandparams]],
2071            [250, [revert_cs_label]],
2072            [249, []],
2073            [248, [revert_accent, revert_utf8, revert_unicode]],
2074            [247, [revert_booktabs]],
2075            [246, [revert_font_settings]],
2076            [245, [revert_framed]]]
2077
2078
2079 if __name__ == "__main__":
2080     pass