lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24
  25 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  26 from LyX import get_encoding
  27
  28
  29 ####################################################################
  30 # Private helper functions
  31
  32 def find_end_of_inset(lines, i):
  33     " Find end of inset, where lines[i] is included."
  34     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  35
  36 def find_end_of_layout(lines, i):
  37     " Find end of layout, where lines[i] is included."
  38     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  39
  40 def find_beginning_of_layout(lines, i):
  41     "Find beginning of layout, where lines[i] is included."
  42     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  43
  44 # End of helper functions
  45 ####################################################################
  46
  47
  48 ##
  49 #  Notes: Framed/Shaded
  50 #
  51
  52 def revert_framed(document):
  53     "Revert framed notes. "
  54     i = 0
  55     while 1:
  56         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  57
  58         if i == -1:
  59             return
  60         document.body[i] = "\\begin_inset Note"
  61         i = i + 1
  62
  63
  64 ##
  65 #  Fonts
  66 #
  67
  68 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  69                     'times'   : 'times',   'palatino' : 'palatino',
  70                     'helvet'  : 'default', 'avant'    : 'default',
  71                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  72                     'pslatex' : 'times'}
  73 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  74                     'times'   : 'default', 'palatino' : 'default',
  75                     'helvet'  : 'helvet',  'avant'    : 'avant',
  76                     'newcent' : 'default', 'bookman'  : 'default',
  77                     'pslatex' : 'helvet'}
  78 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  79                     'times'   : 'default', 'palatino' : 'default',
  80                     'helvet'  : 'default', 'avant'    : 'default',
  81                     'newcent' : 'default', 'bookman'  : 'default',
  82                     'pslatex' : 'courier'}
  83
  84 def convert_font_settings(document):
  85     " Convert font settings. "
  86     i = 0
  87     i = find_token_exact(document.header, "\\fontscheme", i)
  88     if i == -1:
  89         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  90         return
  91     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  92     if font_scheme == '':
  93         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  94         font_scheme = 'default'
  95     if not font_scheme in roman_fonts.keys():
  96         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  97         font_scheme = 'default'
  98     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
  99                           '\\font_sans %s' % sans_fonts[font_scheme],
 100                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
 101                           '\\font_default_family default',
 102                           '\\font_sc false',
 103                           '\\font_osf false',
 104                           '\\font_sf_scale 100',
 105                           '\\font_tt_scale 100']
 106
 107
 108 def revert_font_settings(document):
 109     " Revert font settings. "
 110     i = 0
 111     insert_line = -1
 112     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 113     for family in 'roman', 'sans', 'typewriter':
 114         name = '\\font_%s' % family
 115         i = find_token_exact(document.header, name, i)
 116         if i == -1:
 117             document.warning("Malformed LyX document: Missing `%s'." % name)
 118             i = 0
 119         else:
 120             if (insert_line < 0):
 121                 insert_line = i
 122             fonts[family] = get_value(document.header, name, i, i + 1)
 123             del document.header[i]
 124     i = find_token_exact(document.header, '\\font_default_family', i)
 125     if i == -1:
 126         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 127         font_default_family = 'default'
 128     else:
 129         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 130         del document.header[i]
 131     i = find_token_exact(document.header, '\\font_sc', i)
 132     if i == -1:
 133         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 134         font_sc = 'false'
 135     else:
 136         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 137         del document.header[i]
 138     if font_sc != 'false':
 139         document.warning("Conversion of '\\font_sc' not yet implemented.")
 140     i = find_token_exact(document.header, '\\font_osf', i)
 141     if i == -1:
 142         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 143         font_osf = 'false'
 144     else:
 145         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 146         del document.header[i]
 147     i = find_token_exact(document.header, '\\font_sf_scale', i)
 148     if i == -1:
 149         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 150         font_sf_scale = '100'
 151     else:
 152         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 153         del document.header[i]
 154     if font_sf_scale != '100':
 155         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 156     i = find_token_exact(document.header, '\\font_tt_scale', i)
 157     if i == -1:
 158         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 159         font_tt_scale = '100'
 160     else:
 161         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 162         del document.header[i]
 163     if font_tt_scale != '100':
 164         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 165     for font_scheme in roman_fonts.keys():
 166         if (roman_fonts[font_scheme] == fonts['roman'] and
 167             sans_fonts[font_scheme] == fonts['sans'] and
 168             typewriter_fonts[font_scheme] == fonts['typewriter']):
 169             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 170             if font_default_family != 'default':
 171                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 172             if font_osf == 'true':
 173                 document.warning("Ignoring `\\font_osf = true'")
 174             return
 175     font_scheme = 'default'
 176     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 177     if fonts['roman'] == 'cmr':
 178         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 179         if font_osf == 'true':
 180             document.preamble.append('\\usepackage{eco}')
 181             font_osf = 'false'
 182     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 183         if fonts['roman'] == font:
 184             document.preamble.append('\\usepackage{%s}' % font)
 185     for font in 'cmss', 'lmss', 'cmbr':
 186         if fonts['sans'] == font:
 187             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 188     for font in 'berasans':
 189         if fonts['sans'] == font:
 190             document.preamble.append('\\usepackage{%s}' % font)
 191     for font in 'cmtt', 'lmtt', 'cmtl':
 192         if fonts['typewriter'] == font:
 193             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 194     for font in 'courier', 'beramono', 'luximono':
 195         if fonts['typewriter'] == font:
 196             document.preamble.append('\\usepackage{%s}' % font)
 197     if font_default_family != 'default':
 198         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 199     if font_osf == 'true':
 200         document.warning("Ignoring `\\font_osf = true'")
 201
 202
 203 def revert_booktabs(document):
 204     " We remove the booktabs flag or everything else will become a mess. "
 205     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 206     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 207     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 208     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 209     i = 0
 210     while 1:
 211         i = find_token(document.body, "\\begin_inset Tabular", i)
 212         if i == -1:
 213             return
 214         j = find_end_of_inset(document.body, i + 1)
 215         if j == -1:
 216             document.warning("Malformed LyX document: Could not find end of tabular.")
 217             continue
 218         for k in range(i, j):
 219             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 220                 document.warning("Converting 'booktabs' table to normal table.")
 221                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 222             if re.search(re_row, document.body[k]):
 223                 document.warning("Removing extra row space.")
 224                 document.body[k] = re_tspace.sub('', document.body[k])
 225                 document.body[k] = re_bspace.sub('', document.body[k])
 226                 document.body[k] = re_ispace.sub('', document.body[k])
 227         i = i + 1
 228
 229
 230 def convert_multiencoding(document, forward):
 231     """ Fix files with multiple encodings.
 232 Files with an inputencoding of "auto" or "default" and multiple languages
 233 where at least two languages have different default encodings are encoded
 234 in multiple encodings for file formats < 249. These files are incorrectly
 235 read and written (as if the whole file was in the encoding of the main
 236 language).
 237 This is not true for files written by CJK-LyX, they are always in the locale
 238 encoding.
 239
 240 This function
 241 - converts from fake unicode values to true unicode if forward is true, and
 242 - converts from true unicode values to fake unicode if forward is false.
 243 document.encoding must be set to the old value (format 248) in both cases.
 244
 245 We do this here and not in LyX.py because it is far easier to do the
 246 necessary parsing in modern formats than in ancient ones.
 247 """
 248     if document.cjk_encoding != '':
 249         return
 250     encoding_stack = [document.encoding]
 251     lang_re = re.compile(r"^\\lang\s(\S+)")
 252     if document.inputencoding == "auto" or document.inputencoding == "default":
 253         for i in range(len(document.body)):
 254             result = lang_re.match(document.body[i])
 255             if result:
 256                 language = result.group(1)
 257                 if language == "default":
 258                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 259                     encoding_stack[-1] = document.encoding
 260                 else:
 261                     from lyx2lyx_lang import lang
 262                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 263                     encoding_stack[-1] = lang[language][3]
 264             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 265                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 266                 encoding_stack.append(encoding_stack[-1])
 267             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 268                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 269                 if len(encoding_stack) == 1:
 270                     # Don't remove the document encoding from the stack
 271                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 272                 else:
 273                     del encoding_stack[-1]
 274             if encoding_stack[-1] != document.encoding:
 275                 if forward:
 276                     # This line has been incorrectly interpreted as if it was
 277                     # encoded in 'encoding'.
 278                     # Convert back to the 8bit string that was in the file.
 279                     orig = document.body[i].encode(document.encoding)
 280                     # Convert the 8bit string that was in the file to unicode
 281                     # with the correct encoding.
 282                     document.body[i] = orig.decode(encoding_stack[-1])
 283                 else:
 284                     # Convert unicode to the 8bit string that will be written
 285                     # to the file with the correct encoding.
 286                     orig = document.body[i].encode(encoding_stack[-1])
 287                     # Convert the 8bit string that will be written to the
 288                     # file to fake unicode with the encoding that will later
 289                     # be used when writing to the file.
 290                     document.body[i] = orig.decode(document.encoding)
 291
 292
 293 def convert_utf8(document):
 294     " Set document encoding to UTF-8. "
 295     convert_multiencoding(document, True)
 296     document.encoding = "utf8"
 297
 298
 299 def revert_utf8(document):
 300     " Set document encoding to the value corresponding to inputencoding. "
 301     i = find_token(document.header, "\\inputencoding", 0)
 302     if i == -1:
 303         document.header.append("\\inputencoding auto")
 304     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 305         document.header[i] = "\\inputencoding auto"
 306     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 307     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 308     convert_multiencoding(document, False)
 309
 310
 311 def revert_cs_label(document):
 312     " Remove status flag of charstyle label. "
 313     i = 0
 314     while 1:
 315         i = find_token(document.body, "\\begin_inset CharStyle", i)
 316         if i == -1:
 317             return
 318         # Seach for a line starting 'show_label'
 319         # If it is not there, break with a warning message
 320         i = i + 1
 321         while 1:
 322             if (document.body[i][:10] == "show_label"):
 323                 del document.body[i]
 324                 break
 325             elif (document.body[i][:13] == "\\begin_layout"):
 326                 document.warning("Malformed LyX document: Missing 'show_label'.")
 327                 break
 328             i = i + 1
 329
 330         i = i + 1
 331
 332
 333 def convert_bibitem(document):
 334     """ Convert
 335 \bibitem [option]{argument}
 336
 337 to
 338
 339 \begin_inset LatexCommand bibitem
 340 label "option"
 341 key "argument"
 342
 343 \end_inset
 344
 345 This must be called after convert_commandparams.
 346 """
 347     i = 0
 348     while 1:
 349         i = find_token(document.body, "\\bibitem", i)
 350         if i == -1:
 351             break
 352         j = document.body[i].find('[') + 1
 353         k = document.body[i].rfind(']')
 354         if j == 0: # No optional argument found
 355             option = None
 356         else:
 357             option = document.body[i][j:k]
 358         j = document.body[i].rfind('{') + 1
 359         k = document.body[i].rfind('}')
 360         argument = document.body[i][j:k]
 361         lines = ['\\begin_inset LatexCommand bibitem']
 362         if option != None:
 363             lines.append('label "%s"' % option.replace('"', '\\"'))
 364         lines.append('key "%s"' % argument.replace('"', '\\"'))
 365         lines.append('')
 366         lines.append('\\end_inset')
 367         document.body[i:i+1] = lines
 368         i = i + 1
 369
 370
 371 commandparams_info = {
 372     # command : [option1, option2, argument]
 373     "bibitem" : ["label", "", "key"],
 374     "bibtex" : ["options", "btprint", "bibfiles"],
 375     "cite"        : ["after", "before", "key"],
 376     "citet"       : ["after", "before", "key"],
 377     "citep"       : ["after", "before", "key"],
 378     "citealt"     : ["after", "before", "key"],
 379     "citealp"     : ["after", "before", "key"],
 380     "citeauthor"  : ["after", "before", "key"],
 381     "citeyear"    : ["after", "before", "key"],
 382     "citeyearpar" : ["after", "before", "key"],
 383     "citet*"      : ["after", "before", "key"],
 384     "citep*"      : ["after", "before", "key"],
 385     "citealt*"    : ["after", "before", "key"],
 386     "citealp*"    : ["after", "before", "key"],
 387     "citeauthor*" : ["after", "before", "key"],
 388     "Citet"       : ["after", "before", "key"],
 389     "Citep"       : ["after", "before", "key"],
 390     "Citealt"     : ["after", "before", "key"],
 391     "Citealp"     : ["after", "before", "key"],
 392     "Citeauthor"  : ["after", "before", "key"],
 393     "Citet*"      : ["after", "before", "key"],
 394     "Citep*"      : ["after", "before", "key"],
 395     "Citealt*"    : ["after", "before", "key"],
 396     "Citealp*"    : ["after", "before", "key"],
 397     "Citeauthor*" : ["after", "before", "key"],
 398     "citefield"   : ["after", "before", "key"],
 399     "citetitle"   : ["after", "before", "key"],
 400     "cite*"       : ["after", "before", "key"],
 401     "hfill" : ["", "", ""],
 402     "index"      : ["", "", "name"],
 403     "printindex" : ["", "", "name"],
 404     "label" : ["", "", "name"],
 405     "eqref"     : ["name", "", "reference"],
 406     "pageref"   : ["name", "", "reference"],
 407     "prettyref" : ["name", "", "reference"],
 408     "ref"       : ["name", "", "reference"],
 409     "vpageref"  : ["name", "", "reference"],
 410     "vref"      : ["name", "", "reference"],
 411     "tableofcontents" : ["", "", "type"],
 412     "htmlurl" : ["name", "", "target"],
 413     "url"     : ["name", "", "target"]}
 414
 415
 416 def convert_commandparams(document):
 417     """ Convert
 418
 419  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 420  \end_inset
 421
 422  to
 423
 424  \begin_inset LatexCommand cmdname
 425  name1 "opt1"
 426  name2 "opt2"
 427  name3 "arg"
 428  \end_inset
 429
 430  name1, name2 and name3 can be different for each command.
 431 """
 432     # \begin_inset LatexCommand bibitem was not the official version (see
 433     # convert_bibitem()), but could be read in, so we convert it here, too.
 434
 435     i = 0
 436     while 1:
 437         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 438         if i == -1:
 439             break
 440         command = document.body[i][26:].strip()
 441         if command == "":
 442             document.warning("Malformed LyX document: Missing LatexCommand name.")
 443             i = i + 1
 444             continue
 445
 446         # The following parser is taken from the original InsetCommandParams::scanCommand
 447         name = ""
 448         option1 = ""
 449         option2 = ""
 450         argument = ""
 451         state = "WS"
 452         # Used to handle things like \command[foo[bar]]{foo{bar}}
 453         nestdepth = 0
 454         b = 0
 455         for c in command:
 456             if ((state == "CMDNAME" and c == ' ') or
 457                 (state == "CMDNAME" and c == '[') or
 458                 (state == "CMDNAME" and c == '{')):
 459                 state = "WS"
 460             if ((state == "OPTION" and c == ']') or
 461                 (state == "SECOPTION" and c == ']') or
 462                 (state == "CONTENT" and c == '}')):
 463                 if nestdepth == 0:
 464                     state = "WS"
 465                 else:
 466                     nestdepth = nestdepth - 1
 467             if ((state == "OPTION" and c == '[') or
 468                 (state == "SECOPTION" and c == '[') or
 469                 (state == "CONTENT" and c == '{')):
 470                 nestdepth = nestdepth + 1
 471             if state == "CMDNAME":
 472                     name += c
 473             elif state == "OPTION":
 474                     option1 += c
 475             elif state == "SECOPTION":
 476                     option2 += c
 477             elif state == "CONTENT":
 478                     argument += c
 479             elif state == "WS":
 480                 if c == '\\':
 481                     state = "CMDNAME"
 482                 elif c == '[' and b != ']':
 483                     state = "OPTION"
 484                     nestdepth = 0 # Just to be sure
 485                 elif c == '[' and b == ']':
 486                     state = "SECOPTION"
 487                     nestdepth = 0 # Just to be sure
 488                 elif c == '{':
 489                     state = "CONTENT"
 490                     nestdepth = 0 # Just to be sure
 491             b = c
 492
 493         # Now we have parsed the command, output the parameters
 494         lines = ["\\begin_inset LatexCommand %s" % name]
 495         if option1 != "":
 496             if commandparams_info[name][0] == "":
 497                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 498             else:
 499                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 500         if option2 != "":
 501             if commandparams_info[name][1] == "":
 502                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 503             else:
 504                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 505         if argument != "":
 506             if commandparams_info[name][2] == "":
 507                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 508             else:
 509                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 510         document.body[i:i+1] = lines
 511         i = i + 1
 512
 513
 514 def revert_commandparams(document):
 515     regex = re.compile(r'(\S+)\s+(.+)')
 516     i = 0
 517     while 1:
 518         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 519         if i == -1:
 520             break
 521         name = document.body[i].split()[2]
 522         j = find_end_of_inset(document.body, i + 1)
 523         preview_line = ""
 524         option1 = ""
 525         option2 = ""
 526         argument = ""
 527         for k in range(i + 1, j):
 528             match = re.match(regex, document.body[k])
 529             if match:
 530                 pname = match.group(1)
 531                 pvalue = match.group(2)
 532                 if pname == "preview":
 533                     preview_line = document.body[k]
 534                 elif (commandparams_info[name][0] != "" and
 535                       pname == commandparams_info[name][0]):
 536                     option1 = pvalue.strip('"').replace('\\"', '"')
 537                 elif (commandparams_info[name][1] != "" and
 538                       pname == commandparams_info[name][1]):
 539                     option2 = pvalue.strip('"').replace('\\"', '"')
 540                 elif (commandparams_info[name][2] != "" and
 541                       pname == commandparams_info[name][2]):
 542                     argument = pvalue.strip('"').replace('\\"', '"')
 543             elif document.body[k].strip() != "":
 544                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 545         if name == "bibitem":
 546             if option1 == "":
 547                 lines = ["\\bibitem {%s}" % argument]
 548             else:
 549                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 550         else:
 551             if option1 == "":
 552                 if option2 == "":
 553                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 554                 else:
 555                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 556             else:
 557                 if option2 == "":
 558                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 559                 else:
 560                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 561         if name != "bibitem":
 562             if preview_line != "":
 563                 lines.append(preview_line)
 564             lines.append('')
 565             lines.append('\\end_inset')
 566         document.body[i:j+1] = lines
 567         i = j + 1
 568
 569
 570 def revert_nomenclature(document):
 571     " Convert nomenclature entry to ERT. "
 572     regex = re.compile(r'(\S+)\s+(.+)')
 573     i = 0
 574     use_nomencl = 0
 575     while 1:
 576         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 577         if i == -1:
 578             break
 579         use_nomencl = 1
 580         j = find_end_of_inset(document.body, i + 1)
 581         preview_line = ""
 582         symbol = ""
 583         description = ""
 584         prefix = ""
 585         for k in range(i + 1, j):
 586             match = re.match(regex, document.body[k])
 587             if match:
 588                 name = match.group(1)
 589                 value = match.group(2)
 590                 if name == "preview":
 591                     preview_line = document.body[k]
 592                 elif name == "symbol":
 593                     symbol = value.strip('"').replace('\\"', '"')
 594                 elif name == "description":
 595                     description = value.strip('"').replace('\\"', '"')
 596                 elif name == "prefix":
 597                     prefix = value.strip('"').replace('\\"', '"')
 598             elif document.body[k].strip() != "":
 599                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 600         if prefix == "":
 601             command = 'nomenclature{%s}{%s}' % (symbol, description)
 602         else:
 603             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 604         document.body[i:j+1] = ['\\begin_inset ERT',
 605                                 'status collapsed',
 606                                 '',
 607                                 '\\begin_layout %s' % document.default_layout,
 608                                 '',
 609                                 '',
 610                                 '\\backslash',
 611                                 command,
 612                                 '\\end_layout',
 613                                 '',
 614                                 '\\end_inset']
 615         i = i + 11
 616     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 617         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 618         document.preamble.append('\\makenomenclature')
 619
 620
 621 def revert_printnomenclature(document):
 622     " Convert printnomenclature to ERT. "
 623     regex = re.compile(r'(\S+)\s+(.+)')
 624     i = 0
 625     use_nomencl = 0
 626     while 1:
 627         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 628         if i == -1:
 629             break
 630         use_nomencl = 1
 631         j = find_end_of_inset(document.body, i + 1)
 632         preview_line = ""
 633         labelwidth = ""
 634         for k in range(i + 1, j):
 635             match = re.match(regex, document.body[k])
 636             if match:
 637                 name = match.group(1)
 638                 value = match.group(2)
 639                 if name == "preview":
 640                     preview_line = document.body[k]
 641                 elif name == "labelwidth":
 642                     labelwidth = value.strip('"').replace('\\"', '"')
 643             elif document.body[k].strip() != "":
 644                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 645         if labelwidth == "":
 646             command = 'nomenclature{}'
 647         else:
 648             command = 'nomenclature[%s]' % labelwidth
 649         document.body[i:j+1] = ['\\begin_inset ERT',
 650                                 'status collapsed',
 651                                 '',
 652                                 '\\begin_layout %s' % document.default_layout,
 653                                 '',
 654                                 '',
 655                                 '\\backslash',
 656                                 command,
 657                                 '\\end_layout',
 658                                 '',
 659                                 '\\end_inset']
 660         i = i + 11
 661     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 662         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 663         document.preamble.append('\\makenomenclature')
 664
 665
 666 def convert_esint(document):
 667     " Add \\use_esint setting to header. "
 668     i = find_token(document.header, "\\cite_engine", 0)
 669     if i == -1:
 670         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 671         return
 672     # 0 is off, 1 is auto, 2 is on.
 673     document.header.insert(i, '\\use_esint 0')
 674
 675
 676 def revert_esint(document):
 677     " Remove \\use_esint setting from header. "
 678     i = find_token(document.header, "\\use_esint", 0)
 679     if i == -1:
 680         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 681         return
 682     use_esint = document.header[i].split()[1]
 683     del document.header[i]
 684     # 0 is off, 1 is auto, 2 is on.
 685     if (use_esint == 2):
 686         document.preamble.append('\\usepackage{esint}')
 687
 688
 689 def revert_clearpage(document):
 690     " clearpage -> ERT "
 691     i = 0
 692     while 1:
 693         i = find_token(document.body, "\\clearpage", i)
 694         if i == -1:
 695             break
 696         document.body[i:i+1] =  ['\\begin_inset ERT',
 697                                 'status collapsed',
 698                                 '',
 699                                 '\\begin_layout %s' % document.default_layout,
 700                                 '',
 701                                 '',
 702                                 '\\backslash',
 703                                 'clearpage',
 704                                 '\\end_layout',
 705                                 '',
 706                                 '\\end_inset']
 707     i = i + 1
 708
 709
 710 def revert_cleardoublepage(document):
 711     " cleardoublepage -> ERT "
 712     i = 0
 713     while 1:
 714         i = find_token(document.body, "\\cleardoublepage", i)
 715         if i == -1:
 716             break
 717         document.body[i:i+1] =  ['\\begin_inset ERT',
 718                                 'status collapsed',
 719                                 '',
 720                                 '\\begin_layout %s' % document.default_layout,
 721                                 '',
 722                                 '',
 723                                 '\\backslash',
 724                                 'cleardoublepage',
 725                                 '\\end_layout',
 726                                 '',
 727                                 '\\end_inset']
 728     i = i + 1
 729
 730
 731 def convert_lyxline(document):
 732     " remove fontsize commands for \lyxline "
 733     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 734     # to change the line thickness. The new definiton does this so that imported
 735     # \lyxlines would have a different line thickness. The eventual fontsize command
 736     # before \lyxline is therefore removed to get the same output.
 737     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 738                  "large", "Large", "LARGE", "huge", "Huge"]
 739     for n in range(0, len(fontsizes)):
 740         i = 0
 741         k = 0
 742         while i < len(document.body):
 743             i = find_token(document.body, "\\size " + fontsizes[n], i)
 744             k = find_token(document.body, "\\lyxline", i)
 745             # the corresponding fontsize command is always 2 lines before the \lyxline
 746             if (i != -1 and k == i+2):
 747                 document.body[i:i+1] = []
 748             else:
 749                 break
 750         i = i + 1
 751
 752
 753 def revert_encodings(document):
 754     " Set new encodings to auto. "
 755     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 756                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 757                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 758     i = find_token(document.header, "\\inputencoding", 0)
 759     if i == -1:
 760         document.header.append("\\inputencoding auto")
 761     else:
 762         inputenc = get_value(document.header, "\\inputencoding", i)
 763         if inputenc in encodings:
 764             document.header[i] = "\\inputencoding auto"
 765     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 766
 767
 768 def convert_caption(document):
 769     " Convert caption layouts to caption insets. "
 770     i = 0
 771     while 1:
 772         i = find_token(document.body, "\\begin_layout Caption", i)
 773         if i == -1:
 774             return
 775         j = find_end_of_layout(document.body, i)
 776         if j == -1:
 777             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 778             return
 779
 780         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 781         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 782                             "\\begin_inset Caption", "",
 783                             "\\begin_layout %s" % document.default_layout]
 784         i = i + 1
 785
 786
 787 def revert_caption(document):
 788     " Convert caption insets to caption layouts. "
 789     " This assumes that the text class has a caption style. "
 790     i = 0
 791     while 1:
 792         i = find_token(document.body, "\\begin_inset Caption", i)
 793         if i == -1:
 794             return
 795
 796         # We either need to delete the previous \begin_layout line, or we
 797         # need to end the previous layout if this inset is not in the first
 798         # position of the paragraph.
 799         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 800         if layout_before == -1:
 801             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 802             return
 803         layout_line = document.body[layout_before]
 804         del_layout_before = True
 805         l = layout_before + 1
 806         while l < i:
 807             if document.body[l] != "":
 808                 del_layout_before = False
 809                 break
 810             l = l + 1
 811         if del_layout_before:
 812             del document.body[layout_before:i]
 813             i = layout_before
 814         else:
 815             document.body[i:i] = ["\\end_layout", ""]
 816             i = i + 2
 817
 818         # Find start of layout in the inset and end of inset
 819         j = find_token(document.body, "\\begin_layout", i)
 820         if j == -1:
 821             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 822             return
 823         k = find_end_of_inset(document.body, i)
 824         if k == -1:
 825             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 826             return
 827
 828         # We either need to delete the following \end_layout line, or we need
 829         # to restart the old layout if this inset is not at the paragraph end.
 830         layout_after = find_token(document.body, "\\end_layout", k)
 831         if layout_after == -1:
 832             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 833             return
 834         del_layout_after = True
 835         l = k + 1
 836         while l < layout_after:
 837             if document.body[l] != "":
 838                 del_layout_after = False
 839                 break
 840             l = l + 1
 841         if del_layout_after:
 842             del document.body[k+1:layout_after+1]
 843         else:
 844             document.body[k+1:k+1] = [layout_line, ""]
 845
 846         # delete \begin_layout and \end_inset and replace \begin_inset with
 847         # "\begin_layout Caption". This works because we can only have one
 848         # paragraph in the caption inset: The old \end_layout will be recycled.
 849         del document.body[k]
 850         if document.body[k] == "":
 851             del document.body[k]
 852         del document.body[j]
 853         if document.body[j] == "":
 854             del document.body[j]
 855         document.body[i] = "\\begin_layout Caption"
 856         if document.body[i+1] == "":
 857             del document.body[i+1]
 858         i = i + 1
 859
 860
 861 # Accents of InsetLaTeXAccent
 862 accent_map = {
 863     "`" : u'\u0300', # grave
 864     "'" : u'\u0301', # acute
 865     "^" : u'\u0302', # circumflex
 866     "~" : u'\u0303', # tilde
 867     "=" : u'\u0304', # macron
 868     "u" : u'\u0306', # breve
 869     "." : u'\u0307', # dot above
 870     "\"": u'\u0308', # diaresis
 871     "r" : u'\u030a', # ring above
 872     "H" : u'\u030b', # double acute
 873     "v" : u'\u030c', # caron
 874     "b" : u'\u0320', # minus sign below
 875     "d" : u'\u0323', # dot below
 876     "c" : u'\u0327', # cedilla
 877     "k" : u'\u0328', # ogonek
 878     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 879                      # only one is given as argument, so we don't need to
 880                      # treat it differently.
 881 }
 882
 883
 884 # special accents of InsetLaTeXAccent without argument
 885 special_accent_map = {
 886     'i' : u'\u0131', # dotless i
 887     'j' : u'\u0237', # dotless j
 888     'l' : u'\u0142', # l with stroke
 889     'L' : u'\u0141'  # L with stroke
 890 }
 891
 892
 893 # special accent arguments of InsetLaTeXAccent
 894 accented_map = {
 895     '\\i' : u'\u0131', # dotless i
 896     '\\j' : u'\u0237'  # dotless j
 897 }
 898
 899
 900 def _convert_accent(accent, accented_char):
 901     type = accent
 902     char = accented_char
 903     if char == '':
 904         if type in special_accent_map:
 905             return special_accent_map[type]
 906         # a missing char is treated as space by LyX
 907         char = ' '
 908     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 909         # Special caron, only used with t, d, l and L.
 910         # It is not in the map because we convert it to the same unicode
 911         # character as the normal caron: \q{} is only defined if babel with
 912         # the czech or slovak language is used, and the normal caron
 913         # produces the correct output if the T1 font encoding is used.
 914         # For the same reason we never convert to \q{} in the other direction.
 915         type = 'v'
 916     elif char in accented_map:
 917         char = accented_map[char]
 918     elif (len(char) > 1):
 919         # We can only convert accents on a single char
 920         return ''
 921     a = accent_map.get(type)
 922     if a:
 923         return unicodedata.normalize("NFKC", "%s%s" % (char, a))
 924     return ''
 925
 926
 927 def convert_ertbackslash(body, i, ert, default_layout):
 928     r""" -------------------------------------------------------------------------------------------
 929     Convert backslashes and '\n' into valid ERT code, append the converted
 930     text to body[i] and return the (maybe incremented) line index i"""
 931
 932     for c in ert:
 933         if c == '\\':
 934             body[i] = body[i] + '\\backslash '
 935             i = i + 1
 936             body.insert(i, '')
 937         elif c == '\n':
 938             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 939             i = i + 4
 940         else:
 941             body[i] = body[i] + c
 942     return i
 943
 944
 945 def convert_accent(document):
 946     # The following forms are supported by LyX:
 947     # '\i \"{a}' (standard form, as written by LyX)
 948     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 949     # '\i \"{ }' (also accepted if the accented char is a space)
 950     # '\i \" a'  (also accepted)
 951     # '\i \"'    (also accepted)
 952     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 953     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 954     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 955     i = 0
 956     while 1:
 957         i = find_re(document.body, re_wholeinset, i)
 958         if i == -1:
 959             return
 960         match = re_wholeinset.match(document.body[i])
 961         prefix = match.group(1)
 962         contents = match.group(3).strip()
 963         match = re_contents.match(contents)
 964         if match:
 965             # Strip first char (always \)
 966             accent = match.group(1)[1:]
 967             accented_contents = match.group(2).strip()
 968             match = re_accentedcontents.match(accented_contents)
 969             accented_char = match.group(1)
 970             converted = _convert_accent(accent, accented_char)
 971             if converted == '':
 972                 # Normalize contents
 973                 contents = '%s{%s}' % (accent, accented_char),
 974             else:
 975                 document.body[i] = '%s%s' % (prefix, converted)
 976                 i += 1
 977                 continue
 978         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 979         document.body[i] = prefix
 980         document.body[i+1:i+1] = ['\\begin_inset ERT',
 981                                   'status collapsed',
 982                                   '',
 983                                   '\\begin_layout %s' % document.default_layout,
 984                                   '',
 985                                   '',
 986                                   '']
 987         i = convert_ertbackslash(document.body, i + 7,
 988                                  '\\%s' % contents,
 989                                  document.default_layout)
 990         document.body[i+1:i+1] = ['\\end_layout',
 991                                   '',
 992                                   '\\end_inset']
 993         i += 3
 994
 995
 996 def revert_accent(document):
 997     inverse_accent_map = {}
 998     for k in accent_map:
 999         inverse_accent_map[accent_map[k]] = k
1000     inverse_special_accent_map = {}
1001     for k in special_accent_map:
1002         inverse_special_accent_map[special_accent_map[k]] = k
1003     inverse_accented_map = {}
1004     for k in accented_map:
1005         inverse_accented_map[accented_map[k]] = k
1006
1007     # Since LyX may insert a line break within a word we must combine all
1008     # words before unicode normalization.
1009     # We do this only if the next line starts with an accent, otherwise we
1010     # would create things like '\begin_inset ERTstatus'.
1011     numberoflines = len(document.body)
1012     for i in range(numberoflines-1):
1013         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1014             continue
1015         if (document.body[i+1][0] in inverse_accent_map):
1016             # the last character of this line and the first of the next line
1017             # form probably a surrogate pair.
1018             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1019                 document.body[i] += document.body[i+1][0]
1020                 document.body[i+1] = document.body[i+1][1:]
1021
1022     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1023     # This is needed to catch all accented characters.
1024     for i in range(numberoflines):
1025         # Unfortunately we have a mixture of unicode strings and plain strings,
1026         # because we never use u'xxx' for string literals, but 'xxx'.
1027         # Therefore we may have to try two times to normalize the data.
1028         try:
1029             document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1030         except TypeError:
1031             document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1032
1033     # Replace accented characters with InsetLaTeXAccent
1034     # Do not convert characters that can be represented in the chosen
1035     # encoding.
1036     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1037     lang_re = re.compile(r"^\\lang\s(\S+)")
1038     for i in range(len(document.body)):
1039
1040         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1041             # Track the encoding of the current line
1042             result = lang_re.match(document.body[i])
1043             if result:
1044                 language = result.group(1)
1045                 if language == "default":
1046                     encoding_stack[-1] = document.encoding
1047                 else:
1048                     from lyx2lyx_lang import lang
1049                     encoding_stack[-1] = lang[language][3]
1050                 continue
1051             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1052                 encoding_stack.append(encoding_stack[-1])
1053                 continue
1054             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1055                 del encoding_stack[-1]
1056                 continue
1057
1058         for j in range(len(document.body[i])):
1059             # dotless i and dotless j are both in special_accent_map and can
1060             # occur as an accented character, so we need to test that the
1061             # following character is no accent
1062             if (document.body[i][j] in inverse_special_accent_map and
1063                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1064                 accent = document.body[i][j]
1065                 try:
1066                     dummy = accent.encode(encoding_stack[-1])
1067                 except UnicodeEncodeError:
1068                     # Insert the rest of the line as new line
1069                     if j < len(document.body[i]) - 1:
1070                         document.body[i+1:i+1] = document.body[i][j+1:]
1071                     # Delete the accented character
1072                     if j > 0:
1073                         document.body[i] = document.body[i][:j-1]
1074                     else:
1075                         document.body[i] = u''
1076                     # Finally add the InsetLaTeXAccent
1077                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1078                     break
1079             elif j > 0 and document.body[i][j] in inverse_accent_map:
1080                 accented_char = document.body[i][j-1]
1081                 if accented_char == ' ':
1082                     # Conform to LyX output
1083                     accented_char = ''
1084                 elif accented_char in inverse_accented_map:
1085                     accented_char = inverse_accented_map[accented_char]
1086                 accent = document.body[i][j]
1087                 try:
1088                     dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1089                 except UnicodeEncodeError:
1090                     # Insert the rest of the line as new line
1091                     if j < len(document.body[i]) - 1:
1092                         document.body[i+1:i+1] = document.body[i][j+1:]
1093                     # Delete the accented characters
1094                     if j > 1:
1095                         document.body[i] = document.body[i][:j-2]
1096                     else:
1097                         document.body[i] = u''
1098                     # Finally add the InsetLaTeXAccent
1099                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1100                     break
1101     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1102     for i in range(numberoflines):
1103         document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1104
1105
1106 def normalize_font_whitespace_259(document):
1107     """ Before format 259 the font changes were ignored if a
1108     whitespace was the first or last character in the sequence, this function
1109     transfers the whitespace outside."""
1110
1111     char_properties = {"\\series": "default",
1112                        "\\emph": "default",
1113                        "\\color": "none",
1114                        "\\shape": "default",
1115                        "\\bar": "default",
1116                        "\\family": "default"}
1117     return normalize_font_whitespace(document, char_properties)
1118
1119 def normalize_font_whitespace_274(document):
1120     """ Before format 259 (sic) the font changes were ignored if a
1121     whitespace was the first or last character in the sequence. This was
1122     corrected for most font properties in format 259, but the language
1123     was forgotten then. This function applies the same conversion done
1124     there (namely, transfers the whitespace outside) for font language
1125     changes, as well."""
1126
1127     char_properties = {"\\lang": "default"}
1128     return normalize_font_whitespace(document, char_properties)
1129
1130 def get_paragraph_language(document, i):
1131     """ Return the language of the paragraph in which line i of the document
1132     body is. If the first thing in the paragraph is a \\lang command, that
1133     is the paragraph's langauge; otherwise, the paragraph's language is the
1134     document's language."""
1135
1136     lines = document.body
1137
1138     first_nonempty_line = \
1139         find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1140
1141     words = lines[first_nonempty_line].split()
1142
1143     if len(words) > 1 and words[0] == "\\lang":
1144         return words[1]
1145     else:
1146         return document.language
1147
1148 def normalize_font_whitespace(document, char_properties):
1149     """ Before format 259 the font changes were ignored if a
1150     whitespace was the first or last character in the sequence, this function
1151     transfers the whitespace outside. Only a change in one of the properties
1152     in the provided     char_properties is handled by this function."""
1153
1154     if document.backend != "latex":
1155         return
1156
1157     lines = document.body
1158
1159     changes = {}
1160
1161     i = 0
1162     while i < len(lines):
1163         words = lines[i].split()
1164
1165         if len(words) > 0 and words[0] == "\\begin_layout":
1166             # a new paragraph resets all font changes
1167             changes.clear()
1168             # also reset the default language to be the paragraph's language
1169             if "\\lang" in char_properties.keys():
1170                 char_properties["\\lang"] = \
1171                     get_paragraph_language(document, i + 1)
1172
1173         elif len(words) > 1 and words[0] in char_properties.keys():
1174             # we have a font change
1175             if char_properties[words[0]] == words[1]:
1176                 # property gets reset
1177                 if words[0] in changes.keys():
1178                     del changes[words[0]]
1179                 defaultproperty = True
1180             else:
1181                 # property gets set
1182                 changes[words[0]] = words[1]
1183                 defaultproperty = False
1184
1185             # We need to explicitly reset all changed properties if we find
1186             # a space below, because LyX 1.4 would output the space after
1187             # closing the previous change and before starting the new one,
1188             # and closing a font change means to close all properties, not
1189             # just the changed one.
1190
1191             if lines[i-1] and lines[i-1][-1] == " ":
1192                 lines[i-1] = lines[i-1][:-1]
1193                 # a space before the font change
1194                 added_lines = [" "]
1195                 for k in changes.keys():
1196                     # exclude property k because that is already in lines[i]
1197                     if k != words[0]:
1198                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1199                 for k in changes.keys():
1200                     # exclude property k because that must be added below anyway
1201                     if k != words[0]:
1202                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1203                 if defaultproperty:
1204                     # Property is reset in lines[i], so add the new stuff afterwards
1205                     lines[i+1:i+1] = added_lines
1206                 else:
1207                     # Reset property for the space
1208                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1209                     lines[i:i] = added_lines
1210                 i = i + len(added_lines)
1211
1212             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1213                 # a space after the font change
1214                 if (lines[i+1] == " " and lines[i+2]):
1215                     next_words = lines[i+2].split()
1216                     if len(next_words) > 0 and next_words[0] == words[0]:
1217                         # a single blank with a property different from the
1218                         # previous and the next line must not be changed
1219                         i = i + 2
1220                         continue
1221                 lines[i+1] = lines[i+1][1:]
1222                 added_lines = [" "]
1223                 for k in changes.keys():
1224                     # exclude property k because that is already in lines[i]
1225                     if k != words[0]:
1226                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1227                 for k in changes.keys():
1228                     # exclude property k because that must be added below anyway
1229                     if k != words[0]:
1230                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1231                 # Reset property for the space
1232                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1233                 lines[i:i] = added_lines
1234                 i = i + len(added_lines)
1235
1236         i = i + 1
1237
1238
1239 def revert_utf8x(document):
1240     " Set utf8x encoding to utf8. "
1241     i = find_token(document.header, "\\inputencoding", 0)
1242     if i == -1:
1243         document.header.append("\\inputencoding auto")
1244     else:
1245         inputenc = get_value(document.header, "\\inputencoding", i)
1246         if inputenc == "utf8x":
1247             document.header[i] = "\\inputencoding utf8"
1248     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1249
1250
1251 def revert_utf8plain(document):
1252     " Set utf8plain encoding to utf8. "
1253     i = find_token(document.header, "\\inputencoding", 0)
1254     if i == -1:
1255         document.header.append("\\inputencoding auto")
1256     else:
1257         inputenc = get_value(document.header, "\\inputencoding", i)
1258         if inputenc == "utf8-plain":
1259             document.header[i] = "\\inputencoding utf8"
1260     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1261
1262
1263 def revert_beamer_alert(document):
1264     " Revert beamer's \\alert inset back to ERT. "
1265     i = 0
1266     while 1:
1267         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1268         if i == -1:
1269             return
1270         document.body[i] = "\\begin_inset ERT"
1271         i = i + 1
1272         while 1:
1273             if (document.body[i][:13] == "\\begin_layout"):
1274                 # Insert the \alert command
1275                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1276                 break
1277             i = i + 1
1278
1279         i = i + 1
1280
1281
1282 def revert_beamer_structure(document):
1283     " Revert beamer's \\structure inset back to ERT. "
1284     i = 0
1285     while 1:
1286         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1287         if i == -1:
1288             return
1289         document.body[i] = "\\begin_inset ERT"
1290         i = i + 1
1291         while 1:
1292             if (document.body[i][:13] == "\\begin_layout"):
1293                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1294                 break
1295             i = i + 1
1296
1297         i = i + 1
1298
1299
1300 def convert_changes(document):
1301     " Switch output_changes off if tracking_changes is off. "
1302     i = find_token(document.header, '\\tracking_changes', 0)
1303     if i == -1:
1304         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1305         return
1306     j = find_token(document.header, '\\output_changes', 0)
1307     if j == -1:
1308         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1309         return
1310     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1311     output_changes = get_value(document.header, "\\output_changes", j)
1312     if tracking_changes == "false" and output_changes == "true":
1313         document.header[j] = "\\output_changes false"
1314
1315
1316 def revert_ascii(document):
1317     " Set ascii encoding to auto. "
1318     i = find_token(document.header, "\\inputencoding", 0)
1319     if i == -1:
1320         document.header.append("\\inputencoding auto")
1321     else:
1322         inputenc = get_value(document.header, "\\inputencoding", i)
1323         if inputenc == "ascii":
1324             document.header[i] = "\\inputencoding auto"
1325     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1326
1327
1328 def normalize_language_name(document):
1329     lang = { "brazil": "brazilian",
1330              "portuges": "portuguese"}
1331
1332     if document.language in lang:
1333         document.language = lang[document.language]
1334         i = find_token(document.header, "\\language", 0)
1335         document.header[i] = "\\language %s" % document.language
1336
1337
1338 def revert_language_name(document):
1339     lang = { "brazilian": "brazil",
1340              "portuguese": "portuges"}
1341
1342     if document.language in lang:
1343         document.language = lang[document.language]
1344         i = find_token(document.header, "\\language", 0)
1345         document.header[i] = "\\language %s" % document.language
1346
1347 #
1348 #  \textclass cv -> \textclass simplecv
1349 def convert_cv_textclass(document):
1350     if document.textclass == "cv":
1351         document.textclass = "simplecv"
1352
1353
1354 def revert_cv_textclass(document):
1355     if document.textclass == "simplecv":
1356         document.textclass = "cv"
1357
1358
1359 def convert_tableborder(document):
1360     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1361     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1362     i = 0
1363     while i < len(document.body):
1364         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1365         k = document.body[i].find("|>{", 0, len(document.body[i]))
1366         # the two tokens have to be in one line
1367         if (h != -1 and k != -1):
1368             # delete the "|"
1369             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1370         i = i + 1
1371
1372
1373 def revert_tableborder(document):
1374     i = 0
1375     while i < len(document.body):
1376         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1377         k = document.body[i].find(">{", 0, len(document.body[i]))
1378         # the two tokens have to be in one line
1379         if (h != -1 and k != -1):
1380             # add the "|"
1381             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1382         i = i + 1
1383
1384
1385 def revert_armenian(document):
1386
1387     # set inputencoding from armscii8 to auto
1388     if document.inputencoding == "armscii8":
1389         i = find_token(document.header, "\\inputencoding", 0)
1390         if i != -1:
1391             document.header[i] = "\\inputencoding auto"
1392     # check if preamble exists, if not k is set to -1
1393     i = 0
1394     k = -1
1395     while i < len(document.preamble):
1396         if k == -1:
1397             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1398         if k == -1:
1399             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1400         i = i + 1
1401     # add the entry \usepackage{armtex} to the document preamble
1402     if document.language == "armenian":
1403         # set the armtex entry as the first preamble line
1404         if k != -1:
1405             document.preamble[0:0] = ["\\usepackage{armtex}"]
1406         # create the preamble when it doesn't exist
1407         else:
1408             document.preamble.append('\\usepackage{armtex}')
1409     # Set document language from armenian to english
1410     if document.language == "armenian":
1411         document.language = "english"
1412         i = find_token(document.header, "\\language", 0)
1413         if i != -1:
1414             document.header[i] = "\\language english"
1415
1416
1417 def revert_CJK(document):
1418     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1419     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1420                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1421     i = find_token(document.header, "\\inputencoding", 0)
1422     if i == -1:
1423         document.header.append("\\inputencoding auto")
1424     else:
1425         inputenc = get_value(document.header, "\\inputencoding", i)
1426         if inputenc in encodings:
1427             document.header[i] = "\\inputencoding default"
1428     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1429
1430     if document.language == "chinese-simplified" or \
1431        document.language == "chinese-traditional" or \
1432        document.language == "japanese" or document.language == "korean":
1433         document.language = "english"
1434         i = find_token(document.header, "\\language", 0)
1435         if i != -1:
1436             document.header[i] = "\\language english"
1437
1438
1439 def revert_preamble_listings_params(document):
1440     " Revert preamble option \listings_params "
1441     i = find_token(document.header, "\\listings_params", 0)
1442     if i != -1:
1443         document.preamble.append('\\usepackage{listings}')
1444         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1445         document.header.pop(i);
1446
1447
1448 def revert_listings_inset(document):
1449     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1450 FROM
1451
1452 \begin_inset
1453 lstparams "language=Delphi"
1454 inline true
1455 status open
1456
1457 \begin_layout Standard
1458 var i = 10;
1459 \end_layout
1460
1461 \end_inset
1462
1463 TO
1464
1465 \begin_inset ERT
1466 status open
1467 \begin_layout Standard
1468
1469
1470 \backslash
1471 lstinline[language=Delphi]{var i = 10;}
1472 \end_layout
1473
1474 \end_inset
1475
1476 There can be an caption inset in this inset
1477
1478 \begin_layout Standard
1479 \begin_inset Caption
1480
1481 \begin_layout Standard
1482 before label
1483 \begin_inset LatexCommand label
1484 name "lst:caption"
1485
1486 \end_inset
1487
1488 after label
1489 \end_layout
1490
1491 \end_inset
1492
1493
1494 \end_layout
1495
1496 '''
1497     i = 0
1498     while True:
1499         i = find_token(document.body, '\\begin_inset listings', i)
1500         if i == -1:
1501             break
1502         else:
1503             if not '\\usepackage{listings}' in document.preamble:
1504                 document.preamble.append('\\usepackage{listings}')
1505         j = find_end_of_inset(document.body, i + 1)
1506         if j == -1:
1507             # this should not happen
1508             break
1509         inline = 'false'
1510         params = ''
1511         status = 'open'
1512         # first three lines
1513         for line in range(i + 1, i + 4):
1514             if document.body[line].startswith('inline'):
1515                 inline = document.body[line].split()[1]
1516             if document.body[line].startswith('lstparams'):
1517                 params = document.body[line].split()[1].strip('"')
1518             if document.body[line].startswith('status'):
1519                 status = document.body[line].split()[1].strip()
1520                 k = line + 1
1521         # caption?
1522         caption = ''
1523         label = ''
1524         cap = find_token(document.body, '\\begin_inset Caption', i)
1525         if cap != -1:
1526             cap_end = find_end_of_inset(document.body, cap + 1)
1527             if cap_end == -1:
1528                 # this should not happen
1529                 break
1530             # label?
1531             lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1532             if lbl != -1:
1533                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1534                 if lbl_end == -1:
1535                     # this should not happen
1536                     break
1537             else:
1538                 lbl = cap_end
1539                 lbl_end = cap_end
1540             for line in document.body[lbl : lbl_end + 1]:
1541                 if line.startswith('name '):
1542                     label = line.split()[1].strip('"')
1543                     break
1544             for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1545                 if not line.startswith('\\'):
1546                     caption += line.strip()
1547             k = cap_end + 1
1548         inlinecode = ''
1549         # looking for the oneline code for lstinline
1550         inlinecode = document.body[find_end_of_layout(document.body,
1551             find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
1552         if len(caption) > 0:
1553             if len(params) == 0:
1554                 params = 'caption={%s}' % caption
1555             else:
1556                 params += ',caption={%s}' % caption
1557         if len(label) > 0:
1558             if len(params) == 0:
1559                 params = 'label={%s}' % label
1560             else:
1561                 params += ',label={%s}' % label
1562         if len(params) > 0:
1563             params = '[%s]' % params
1564             params = params.replace('\\', '\\backslash\n')
1565         if inline == 'true':
1566             document.body[i:(j+1)] = [r'\begin_inset ERT',
1567                                       'status %s' % status,
1568                                       r'\begin_layout Standard',
1569                                       '',
1570                                       '',
1571                                       r'\backslash',
1572                                       'lstinline%s{%s}' % (params, inlinecode),
1573                                       r'\end_layout',
1574                                       '',
1575                                       r'\end_inset']
1576         else:
1577             document.body[i: j+1] =  [r'\begin_inset ERT',
1578                                       'status %s' % status,
1579                                       '',
1580                                       r'\begin_layout Standard',
1581                                       '',
1582                                       '',
1583                                       r'\backslash',
1584                                       r'begin{lstlisting}%s' % params,
1585                                       r'\end_layout'
1586                                     ] + document.body[k : j - 1] + \
1587                                      ['',
1588                                       r'\begin_layout Standard',
1589                                       '',
1590                                       r'\backslash',
1591                                       'end{lstlisting}',
1592                                       r'\end_layout',
1593                                       '',
1594                                       r'\end_inset']
1595
1596
1597 def revert_include_listings(document):
1598     r''' Revert lstinputlisting Include option , translate
1599 \begin_inset Include \lstinputlisting{file}[opt]
1600 preview false
1601
1602 \end_inset
1603
1604 TO
1605
1606 \begin_inset ERT
1607 status open
1608
1609 \begin_layout Standard
1610
1611
1612 \backslash
1613 lstinputlisting{file}[opt]
1614 \end_layout
1615
1616 \end_inset
1617     '''
1618
1619     i = 0
1620     while True:
1621         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1622         if i == -1:
1623             break
1624         else:
1625             if not '\\usepackage{listings}' in document.preamble:
1626                 document.preamble.append('\\usepackage{listings}')
1627         j = find_end_of_inset(document.body, i + 1)
1628         if j == -1:
1629             # this should not happen
1630             break
1631         # find command line lstinputlisting{file}[options]
1632         cmd, file, option = '', '', ''
1633         if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1634             cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1635         option = option.replace('\\', '\\backslash\n')
1636         document.body[i : j + 1] = [r'\begin_inset ERT',
1637                                     'status open',
1638                                     '',
1639                                     r'\begin_layout Standard',
1640                                     '',
1641                                     '',
1642                                     r'\backslash',
1643                                     '%s%s{%s}' % (cmd, option, file),
1644                                     r'\end_layout',
1645                                     '',
1646                                     r'\end_inset']
1647
1648
1649 def revert_ext_font_sizes(document):
1650     if document.backend != "latex": return
1651     if not document.textclass.startswith("ext"): return
1652
1653     fontsize = get_value(document.header, '\\paperfontsize', 0)
1654     if fontsize not in ('10', '11', '12'): return
1655     fontsize += 'pt'
1656
1657     i = find_token(document.header, '\\paperfontsize', 0)
1658     document.header[i] = '\\paperfontsize default'
1659
1660     i = find_token(document.header, '\\options', 0)
1661     if i == -1:
1662         i = find_token(document.header, '\\textclass', 0) + 1
1663         document.header[i:i] = ['\\options %s' % fontsize]
1664     else:
1665         document.header[i] += ',%s' % fontsize
1666
1667
1668 def convert_ext_font_sizes(document):
1669     if document.backend != "latex": return
1670     if not document.textclass.startswith("ext"): return
1671
1672     fontsize = get_value(document.header, '\\paperfontsize', 0)
1673     if fontsize != 'default': return
1674
1675     i = find_token(document.header, '\\options', 0)
1676     if i == -1: return
1677
1678     options = get_value(document.header, '\\options', i)
1679
1680     fontsizes = '10pt', '11pt', '12pt'
1681     for fs in fontsizes:
1682         if options.find(fs) != -1:
1683             break
1684     else: # this else will only be attained if the for cycle had no match
1685         return
1686
1687     options = options.split(',')
1688     for j, opt in enumerate(options):
1689         if opt in fontsizes:
1690             fontsize = opt[:-2]
1691             del options[j]
1692             break
1693     else:
1694         return
1695
1696     k = find_token(document.header, '\\paperfontsize', 0)
1697     document.header[k] = '\\paperfontsize %s' % fontsize
1698
1699     if options:
1700         document.header[i] = '\\options %s' % ','.join(options)
1701     else:
1702         del document.header[i]
1703
1704 def revert_separator_layout(document):
1705     r'''Revert --Separator-- to a lyx note
1706 From
1707
1708 \begin_layout --Separator--
1709 something
1710 \end_layout
1711
1712 to
1713
1714 \begin_layout Standard
1715 \begin_inset Note Note
1716 status open
1717
1718 \begin_layout Standard
1719 Separate Evironment
1720 \end_layout
1721
1722 \end_inset
1723 something
1724
1725 \end_layout
1726
1727     '''
1728
1729     i = 0
1730     while True:
1731         i = find_token(document.body, r'\begin_layout --Separator--', i)
1732         if i == -1:
1733             break
1734         j = find_end_of_layout(document.body, i + 1)
1735         if j == -1:
1736             # this should not happen
1737             break
1738         document.body[i : j + 1] = [r'\begin_layout Standard',
1739                                     r'\begin_inset Note Note',
1740                                     'status open',
1741                                     '',
1742                                     r'\begin_layout Standard',
1743                                     'Separate Environment',
1744                                     r'\end_layout',
1745                                     '',
1746                                     r'\end_inset'] + \
1747                                     document.body[ i + 1 : j] + \
1748                                     ['',
1749                                     r'\end_layout'
1750                                     ]
1751
1752 ##
1753 # Conversion hub
1754 #
1755
1756 supported_versions = ["1.5.0","1.5"]
1757 convert = [[246, []],
1758            [247, [convert_font_settings]],
1759            [248, []],
1760            [249, [convert_utf8]],
1761            [250, []],
1762            [251, []],
1763            [252, [convert_commandparams, convert_bibitem]],
1764            [253, []],
1765            [254, [convert_esint]],
1766            [255, []],
1767            [256, []],
1768            [257, [convert_caption]],
1769            [258, [convert_lyxline]],
1770            [259, [convert_accent, normalize_font_whitespace_259]],
1771            [260, []],
1772            [261, [convert_changes]],
1773            [262, []],
1774            [263, [normalize_language_name]],
1775            [264, [convert_cv_textclass]],
1776            [265, [convert_tableborder]],
1777            [266, []],
1778            [267, []],
1779            [268, []],
1780            [269, []],
1781            [270, []],
1782            [271, [convert_ext_font_sizes]],
1783            [272, []],
1784            [273, []],
1785            [274, [normalize_font_whitespace_274]]
1786           ]
1787
1788 revert =  [
1789            [273, []],
1790            [272, [revert_separator_layout]],
1791            [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1792            [270, [revert_ext_font_sizes]],
1793            [269, [revert_beamer_alert, revert_beamer_structure]],
1794            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1795            [267, [revert_CJK]],
1796            [266, [revert_utf8plain]],
1797            [265, [revert_armenian]],
1798            [264, [revert_tableborder]],
1799            [263, [revert_cv_textclass]],
1800            [262, [revert_language_name]],
1801            [261, [revert_ascii]],
1802            [260, []],
1803            [259, [revert_utf8x]],
1804            [258, []],
1805            [257, []],
1806            [256, [revert_caption]],
1807            [255, [revert_encodings]],
1808            [254, [revert_clearpage, revert_cleardoublepage]],
1809            [253, [revert_esint]],
1810            [252, [revert_nomenclature, revert_printnomenclature]],
1811            [251, [revert_commandparams]],
1812            [250, [revert_cs_label]],
1813            [249, []],
1814            [248, [revert_accent, revert_utf8]],
1815            [247, [revert_booktabs]],
1816            [246, [revert_font_settings]],
1817            [245, [revert_framed]]]
1818
1819
1820 if __name__ == "__main__":
1821     pass
1822
1823