lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24
  25 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
  26 from LyX import get_encoding
  27
  28
  29 ####################################################################
  30 # Private helper functions
  31
  32 def find_end_of_inset(lines, i):
  33     " Find end of inset, where lines[i] is included."
  34     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  35
  36 def find_end_of_layout(lines, i):
  37     " Find end of layout, where lines[i] is included."
  38     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  39
  40 # End of helper functions
  41 ####################################################################
  42
  43
  44 ##
  45 #  Notes: Framed/Shaded
  46 #
  47
  48 def revert_framed(document):
  49     "Revert framed notes. "
  50     i = 0
  51     while 1:
  52         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  53
  54         if i == -1:
  55             return
  56         document.body[i] = "\\begin_inset Note"
  57         i = i + 1
  58
  59
  60 ##
  61 #  Fonts
  62 #
  63
  64 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  65                     'times'   : 'times',   'palatino' : 'palatino',
  66                     'helvet'  : 'default', 'avant'    : 'default',
  67                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  68                     'pslatex' : 'times'}
  69 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  70                     'times'   : 'default', 'palatino' : 'default',
  71                     'helvet'  : 'helvet',  'avant'    : 'avant',
  72                     'newcent' : 'default', 'bookman'  : 'default',
  73                     'pslatex' : 'helvet'}
  74 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'default', 'avant'    : 'default',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'courier'}
  79
  80 def convert_font_settings(document):
  81     " Convert font settings. "
  82     i = 0
  83     i = find_token_exact(document.header, "\\fontscheme", i)
  84     if i == -1:
  85         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  86         return
  87     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  88     if font_scheme == '':
  89         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  90         font_scheme = 'default'
  91     if not font_scheme in roman_fonts.keys():
  92         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  93         font_scheme = 'default'
  94     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
  95                           '\\font_sans %s' % sans_fonts[font_scheme],
  96                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
  97                           '\\font_default_family default',
  98                           '\\font_sc false',
  99                           '\\font_osf false',
 100                           '\\font_sf_scale 100',
 101                           '\\font_tt_scale 100']
 102
 103
 104 def revert_font_settings(document):
 105     " Revert font settings. "
 106     i = 0
 107     insert_line = -1
 108     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 109     for family in 'roman', 'sans', 'typewriter':
 110         name = '\\font_%s' % family
 111         i = find_token_exact(document.header, name, i)
 112         if i == -1:
 113             document.warning("Malformed LyX document: Missing `%s'." % name)
 114             i = 0
 115         else:
 116             if (insert_line < 0):
 117                 insert_line = i
 118             fonts[family] = get_value(document.header, name, i, i + 1)
 119             del document.header[i]
 120     i = find_token_exact(document.header, '\\font_default_family', i)
 121     if i == -1:
 122         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 123         font_default_family = 'default'
 124     else:
 125         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 126         del document.header[i]
 127     i = find_token_exact(document.header, '\\font_sc', i)
 128     if i == -1:
 129         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 130         font_sc = 'false'
 131     else:
 132         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 133         del document.header[i]
 134     if font_sc != 'false':
 135         document.warning("Conversion of '\\font_sc' not yet implemented.")
 136     i = find_token_exact(document.header, '\\font_osf', i)
 137     if i == -1:
 138         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 139         font_osf = 'false'
 140     else:
 141         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 142         del document.header[i]
 143     i = find_token_exact(document.header, '\\font_sf_scale', i)
 144     if i == -1:
 145         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 146         font_sf_scale = '100'
 147     else:
 148         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 149         del document.header[i]
 150     if font_sf_scale != '100':
 151         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 152     i = find_token_exact(document.header, '\\font_tt_scale', i)
 153     if i == -1:
 154         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 155         font_tt_scale = '100'
 156     else:
 157         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 158         del document.header[i]
 159     if font_tt_scale != '100':
 160         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 161     for font_scheme in roman_fonts.keys():
 162         if (roman_fonts[font_scheme] == fonts['roman'] and
 163             sans_fonts[font_scheme] == fonts['sans'] and
 164             typewriter_fonts[font_scheme] == fonts['typewriter']):
 165             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 166             if font_default_family != 'default':
 167                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 168             if font_osf == 'true':
 169                 document.warning("Ignoring `\\font_osf = true'")
 170             return
 171     font_scheme = 'default'
 172     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 173     if fonts['roman'] == 'cmr':
 174         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 175         if font_osf == 'true':
 176             document.preamble.append('\\usepackage{eco}')
 177             font_osf = 'false'
 178     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 179         if fonts['roman'] == font:
 180             document.preamble.append('\\usepackage{%s}' % font)
 181     for font in 'cmss', 'lmss', 'cmbr':
 182         if fonts['sans'] == font:
 183             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 184     for font in 'berasans':
 185         if fonts['sans'] == font:
 186             document.preamble.append('\\usepackage{%s}' % font)
 187     for font in 'cmtt', 'lmtt', 'cmtl':
 188         if fonts['typewriter'] == font:
 189             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 190     for font in 'courier', 'beramono', 'luximono':
 191         if fonts['typewriter'] == font:
 192             document.preamble.append('\\usepackage{%s}' % font)
 193     if font_default_family != 'default':
 194         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 195     if font_osf == 'true':
 196         document.warning("Ignoring `\\font_osf = true'")
 197
 198
 199 def revert_booktabs(document):
 200     " We remove the booktabs flag or everything else will become a mess. "
 201     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 202     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 203     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 204     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 205     i = 0
 206     while 1:
 207         i = find_token(document.body, "\\begin_inset Tabular", i)
 208         if i == -1:
 209             return
 210         j = find_end_of_inset(document.body, i + 1)
 211         if j == -1:
 212             document.warning("Malformed LyX document: Could not find end of tabular.")
 213             continue
 214         for k in range(i, j):
 215             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 216                 document.warning("Converting 'booktabs' table to normal table.")
 217                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 218             if re.search(re_row, document.body[k]):
 219                 document.warning("Removing extra row space.")
 220                 document.body[k] = re_tspace.sub('', document.body[k])
 221                 document.body[k] = re_bspace.sub('', document.body[k])
 222                 document.body[k] = re_ispace.sub('', document.body[k])
 223         i = i + 1
 224
 225
 226 def convert_multiencoding(document, forward):
 227     """ Fix files with multiple encodings.
 228 Files with an inputencoding of "auto" or "default" and multiple languages
 229 where at least two languages have different default encodings are encoded
 230 in multiple encodings for file formats < 249. These files are incorrectly
 231 read and written (as if the whole file was in the encoding of the main
 232 language).
 233 This is not true for files written by CJK-LyX, they are always in the locale
 234 encoding.
 235
 236 This function
 237 - converts from fake unicode values to true unicode if forward is true, and
 238 - converts from true unicode values to fake unicode if forward is false.
 239 document.encoding must be set to the old value (format 248) in both cases.
 240
 241 We do this here and not in LyX.py because it is far easier to do the
 242 necessary parsing in modern formats than in ancient ones.
 243 """
 244     if document.cjk_encoding != '':
 245         return
 246     encoding_stack = [document.encoding]
 247     lang_re = re.compile(r"^\\lang\s(\S+)")
 248     if document.inputencoding == "auto" or document.inputencoding == "default":
 249         for i in range(len(document.body)):
 250             result = lang_re.match(document.body[i])
 251             if result:
 252                 language = result.group(1)
 253                 if language == "default":
 254                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 255                     encoding_stack[-1] = document.encoding
 256                 else:
 257                     from lyx2lyx_lang import lang
 258                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 259                     encoding_stack[-1] = lang[language][3]
 260             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 261                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 262                 encoding_stack.append(encoding_stack[-1])
 263             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 264                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 265                 if len(encoding_stack) == 1:
 266                     # Don't remove the document encoding from the stack
 267                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 268                 else:
 269                     del encoding_stack[-1]
 270             if encoding_stack[-1] != document.encoding:
 271                 if forward:
 272                     # This line has been incorrectly interpreted as if it was
 273                     # encoded in 'encoding'.
 274                     # Convert back to the 8bit string that was in the file.
 275                     orig = document.body[i].encode(document.encoding)
 276                     # Convert the 8bit string that was in the file to unicode
 277                     # with the correct encoding.
 278                     document.body[i] = orig.decode(encoding_stack[-1])
 279                 else:
 280                     # Convert unicode to the 8bit string that will be written
 281                     # to the file with the correct encoding.
 282                     orig = document.body[i].encode(encoding_stack[-1])
 283                     # Convert the 8bit string that will be written to the
 284                     # file to fake unicode with the encoding that will later
 285                     # be used when writing to the file.
 286                     document.body[i] = orig.decode(document.encoding)
 287
 288
 289 def convert_utf8(document):
 290     " Set document encoding to UTF-8. "
 291     convert_multiencoding(document, True)
 292     document.encoding = "utf8"
 293
 294
 295 def revert_utf8(document):
 296     " Set document encoding to the value corresponding to inputencoding. "
 297     i = find_token(document.header, "\\inputencoding", 0)
 298     if i == -1:
 299         document.header.append("\\inputencoding auto")
 300     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 301         document.header[i] = "\\inputencoding auto"
 302     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 303     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 304     convert_multiencoding(document, False)
 305
 306
 307 def revert_cs_label(document):
 308     " Remove status flag of charstyle label. "
 309     i = 0
 310     while 1:
 311         i = find_token(document.body, "\\begin_inset CharStyle", i)
 312         if i == -1:
 313             return
 314         # Seach for a line starting 'show_label'
 315         # If it is not there, break with a warning message
 316         i = i + 1
 317         while 1:
 318             if (document.body[i][:10] == "show_label"):
 319                 del document.body[i]
 320                 break
 321             elif (document.body[i][:13] == "\\begin_layout"):
 322                 document.warning("Malformed LyX document: Missing 'show_label'.")
 323                 break
 324             i = i + 1
 325
 326         i = i + 1
 327
 328
 329 def convert_bibitem(document):
 330     """ Convert
 331 \bibitem [option]{argument}
 332
 333 to
 334
 335 \begin_inset LatexCommand bibitem
 336 label "option"
 337 key "argument"
 338
 339 \end_inset
 340
 341 This must be called after convert_commandparams.
 342 """
 343     regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
 344     i = 0
 345     while 1:
 346         i = find_token(document.body, "\\bibitem", i)
 347         if i == -1:
 348             break
 349         match = re.match(regex, document.body[i])
 350         option = match.group(1)
 351         argument = match.group(2)
 352         lines = ['\\begin_inset LatexCommand bibitem']
 353         if option != None:
 354             lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
 355         lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
 356         lines.append('')
 357         lines.append('\\end_inset')
 358         document.body[i:i+1] = lines
 359         i = i + 1
 360
 361
 362 commandparams_info = {
 363     # command : [option1, option2, argument]
 364     "bibitem" : ["label", "", "key"],
 365     "bibtex" : ["options", "btprint", "bibfiles"],
 366     "cite"        : ["after", "before", "key"],
 367     "citet"       : ["after", "before", "key"],
 368     "citep"       : ["after", "before", "key"],
 369     "citealt"     : ["after", "before", "key"],
 370     "citealp"     : ["after", "before", "key"],
 371     "citeauthor"  : ["after", "before", "key"],
 372     "citeyear"    : ["after", "before", "key"],
 373     "citeyearpar" : ["after", "before", "key"],
 374     "citet*"      : ["after", "before", "key"],
 375     "citep*"      : ["after", "before", "key"],
 376     "citealt*"    : ["after", "before", "key"],
 377     "citealp*"    : ["after", "before", "key"],
 378     "citeauthor*" : ["after", "before", "key"],
 379     "Citet"       : ["after", "before", "key"],
 380     "Citep"       : ["after", "before", "key"],
 381     "Citealt"     : ["after", "before", "key"],
 382     "Citealp"     : ["after", "before", "key"],
 383     "Citeauthor"  : ["after", "before", "key"],
 384     "Citet*"      : ["after", "before", "key"],
 385     "Citep*"      : ["after", "before", "key"],
 386     "Citealt*"    : ["after", "before", "key"],
 387     "Citealp*"    : ["after", "before", "key"],
 388     "Citeauthor*" : ["after", "before", "key"],
 389     "citefield"   : ["after", "before", "key"],
 390     "citetitle"   : ["after", "before", "key"],
 391     "cite*"       : ["after", "before", "key"],
 392     "hfill" : ["", "", ""],
 393     "index"      : ["", "", "name"],
 394     "printindex" : ["", "", "name"],
 395     "label" : ["", "", "name"],
 396     "eqref"     : ["name", "", "reference"],
 397     "pageref"   : ["name", "", "reference"],
 398     "prettyref" : ["name", "", "reference"],
 399     "ref"       : ["name", "", "reference"],
 400     "vpageref"  : ["name", "", "reference"],
 401     "vref"      : ["name", "", "reference"],
 402     "tableofcontents" : ["", "", "type"],
 403     "htmlurl" : ["name", "", "target"],
 404     "url"     : ["name", "", "target"]}
 405
 406
 407 def convert_commandparams(document):
 408     """ Convert
 409
 410  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 411  \end_inset
 412
 413  to
 414
 415  \begin_inset LatexCommand cmdname
 416  name1 "opt1"
 417  name2 "opt2"
 418  name3 "arg"
 419  \end_inset
 420
 421  name1, name2 and name3 can be different for each command.
 422 """
 423     # \begin_inset LatexCommand bibitem was not the official version (see
 424     # convert_bibitem()), but could be read in, so we convert it here, too.
 425
 426     i = 0
 427     while 1:
 428         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 429         if i == -1:
 430             break
 431         command = document.body[i][26:].strip()
 432         if command == "":
 433             document.warning("Malformed LyX document: Missing LatexCommand name.")
 434             i = i + 1
 435             continue
 436
 437         # The following parser is taken from the original InsetCommandParams::scanCommand
 438         name = ""
 439         option1 = ""
 440         option2 = ""
 441         argument = ""
 442         state = "WS"
 443         # Used to handle things like \command[foo[bar]]{foo{bar}}
 444         nestdepth = 0
 445         b = 0
 446         for c in command:
 447             if ((state == "CMDNAME" and c == ' ') or
 448                 (state == "CMDNAME" and c == '[') or
 449                 (state == "CMDNAME" and c == '{')):
 450                 state = "WS"
 451             if ((state == "OPTION" and c == ']') or
 452                 (state == "SECOPTION" and c == ']') or
 453                 (state == "CONTENT" and c == '}')):
 454                 if nestdepth == 0:
 455                     state = "WS"
 456                 else:
 457                     nestdepth = nestdepth - 1
 458             if ((state == "OPTION" and c == '[') or
 459                 (state == "SECOPTION" and c == '[') or
 460                 (state == "CONTENT" and c == '{')):
 461                 nestdepth = nestdepth + 1
 462             if state == "CMDNAME":
 463                     name += c
 464             elif state == "OPTION":
 465                     option1 += c
 466             elif state == "SECOPTION":
 467                     option2 += c
 468             elif state == "CONTENT":
 469                     argument += c
 470             elif state == "WS":
 471                 if c == '\\':
 472                     state = "CMDNAME"
 473                 elif c == '[' and b != ']':
 474                     state = "OPTION"
 475                     nestdepth = 0 # Just to be sure
 476                 elif c == '[' and b == ']':
 477                     state = "SECOPTION"
 478                     nestdepth = 0 # Just to be sure
 479                 elif c == '{':
 480                     state = "CONTENT"
 481                     nestdepth = 0 # Just to be sure
 482             b = c
 483
 484         # Now we have parsed the command, output the parameters
 485         lines = ["\\begin_inset LatexCommand %s" % name]
 486         if option1 != "":
 487             if commandparams_info[name][0] == "":
 488                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 489             else:
 490                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 491         if option2 != "":
 492             if commandparams_info[name][1] == "":
 493                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 494             else:
 495                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 496         if argument != "":
 497             if commandparams_info[name][2] == "":
 498                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 499             else:
 500                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 501         document.body[i:i+1] = lines
 502         i = i + 1
 503
 504
 505 def revert_commandparams(document):
 506     regex = re.compile(r'(\S+)\s+(.+)')
 507     i = 0
 508     while 1:
 509         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 510         if i == -1:
 511             break
 512         name = document.body[i].split()[2]
 513         j = find_end_of_inset(document.body, i + 1)
 514         preview_line = ""
 515         option1 = ""
 516         option2 = ""
 517         argument = ""
 518         for k in range(i + 1, j):
 519             match = re.match(regex, document.body[k])
 520             if match:
 521                 pname = match.group(1)
 522                 pvalue = match.group(2)
 523                 if pname == "preview":
 524                     preview_line = document.body[k]
 525                 elif (commandparams_info[name][0] != "" and
 526                       pname == commandparams_info[name][0]):
 527                     option1 = pvalue.strip('"').replace('\\"', '"')
 528                 elif (commandparams_info[name][1] != "" and
 529                       pname == commandparams_info[name][1]):
 530                     option2 = pvalue.strip('"').replace('\\"', '"')
 531                 elif (commandparams_info[name][2] != "" and
 532                       pname == commandparams_info[name][2]):
 533                     argument = pvalue.strip('"').replace('\\"', '"')
 534             elif document.body[k].strip() != "":
 535                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 536         if name == "bibitem":
 537             if option1 == "":
 538                 lines = ["\\bibitem {%s}" % argument]
 539             else:
 540                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 541         else:
 542             if option1 == "":
 543                 if option2 == "":
 544                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 545                 else:
 546                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 547             else:
 548                 if option2 == "":
 549                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 550                 else:
 551                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 552         if name != "bibitem":
 553             if preview_line != "":
 554                 lines.append(preview_line)
 555             lines.append('')
 556             lines.append('\\end_inset')
 557         document.body[i:j+1] = lines
 558         i = j + 1
 559
 560
 561 def revert_nomenclature(document):
 562     " Convert nomenclature entry to ERT. "
 563     regex = re.compile(r'(\S+)\s+(.+)')
 564     i = 0
 565     use_nomencl = 0
 566     while 1:
 567         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 568         if i == -1:
 569             break
 570         use_nomencl = 1
 571         j = find_end_of_inset(document.body, i + 1)
 572         preview_line = ""
 573         symbol = ""
 574         description = ""
 575         prefix = ""
 576         for k in range(i + 1, j):
 577             match = re.match(regex, document.body[k])
 578             if match:
 579                 name = match.group(1)
 580                 value = match.group(2)
 581                 if name == "preview":
 582                     preview_line = document.body[k]
 583                 elif name == "symbol":
 584                     symbol = value.strip('"').replace('\\"', '"')
 585                 elif name == "description":
 586                     description = value.strip('"').replace('\\"', '"')
 587                 elif name == "prefix":
 588                     prefix = value.strip('"').replace('\\"', '"')
 589             elif document.body[k].strip() != "":
 590                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 591         if prefix == "":
 592             command = 'nomenclature{%s}{%s}' % (symbol, description)
 593         else:
 594             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 595         document.body[i:j+1] = ['\\begin_inset ERT',
 596                                 'status collapsed',
 597                                 '',
 598                                 '\\begin_layout %s' % document.default_layout,
 599                                 '',
 600                                 '',
 601                                 '\\backslash',
 602                                 command,
 603                                 '\\end_layout',
 604                                 '',
 605                                 '\\end_inset']
 606         i = i + 11
 607     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 608         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 609         document.preamble.append('\\makenomenclature')
 610
 611
 612 def revert_printnomenclature(document):
 613     " Convert printnomenclature to ERT. "
 614     regex = re.compile(r'(\S+)\s+(.+)')
 615     i = 0
 616     use_nomencl = 0
 617     while 1:
 618         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 619         if i == -1:
 620             break
 621         use_nomencl = 1
 622         j = find_end_of_inset(document.body, i + 1)
 623         preview_line = ""
 624         labelwidth = ""
 625         for k in range(i + 1, j):
 626             match = re.match(regex, document.body[k])
 627             if match:
 628                 name = match.group(1)
 629                 value = match.group(2)
 630                 if name == "preview":
 631                     preview_line = document.body[k]
 632                 elif name == "labelwidth":
 633                     labelwidth = value.strip('"').replace('\\"', '"')
 634             elif document.body[k].strip() != "":
 635                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 636         if labelwidth == "":
 637             command = 'nomenclature{}'
 638         else:
 639             command = 'nomenclature[%s]' % labelwidth
 640         document.body[i:j+1] = ['\\begin_inset ERT',
 641                                 'status collapsed',
 642                                 '',
 643                                 '\\begin_layout %s' % document.default_layout,
 644                                 '',
 645                                 '',
 646                                 '\\backslash',
 647                                 command,
 648                                 '\\end_layout',
 649                                 '',
 650                                 '\\end_inset']
 651         i = i + 11
 652     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 653         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 654         document.preamble.append('\\makenomenclature')
 655
 656
 657 def convert_esint(document):
 658     " Add \\use_esint setting to header. "
 659     i = find_token(document.header, "\\cite_engine", 0)
 660     if i == -1:
 661         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 662         return
 663     # 0 is off, 1 is auto, 2 is on.
 664     document.header.insert(i, '\\use_esint 0')
 665
 666
 667 def revert_esint(document):
 668     " Remove \\use_esint setting from header. "
 669     i = find_token(document.header, "\\use_esint", 0)
 670     if i == -1:
 671         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 672         return
 673     use_esint = document.header[i].split()[1]
 674     del document.header[i]
 675     # 0 is off, 1 is auto, 2 is on.
 676     if (use_esint == 2):
 677         document.preamble.append('\\usepackage{esint}')
 678
 679
 680 def revert_clearpage(document):
 681     " clearpage -> ERT "
 682     i = 0
 683     while 1:
 684         i = find_token(document.body, "\\clearpage", i)
 685         if i == -1:
 686             break
 687         document.body[i:i+1] =  ['\\begin_inset ERT',
 688                                 'status collapsed',
 689                                 '',
 690                                 '\\begin_layout %s' % document.default_layout,
 691                                 '',
 692                                 '',
 693                                 '\\backslash',
 694                                 'clearpage',
 695                                 '\\end_layout',
 696                                 '',
 697                                 '\\end_inset']
 698     i = i + 1
 699
 700
 701 def revert_cleardoublepage(document):
 702     " cleardoublepage -> ERT "
 703     i = 0
 704     while 1:
 705         i = find_token(document.body, "\\cleardoublepage", i)
 706         if i == -1:
 707             break
 708         document.body[i:i+1] =  ['\\begin_inset ERT',
 709                                 'status collapsed',
 710                                 '',
 711                                 '\\begin_layout %s' % document.default_layout,
 712                                 '',
 713                                 '',
 714                                 '\\backslash',
 715                                 'cleardoublepage',
 716                                 '\\end_layout',
 717                                 '',
 718                                 '\\end_inset']
 719     i = i + 1
 720
 721
 722 def convert_lyxline(document):
 723     " remove fontsize commands for \lyxline "
 724     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 725     # to change the line thickness. The new definiton does this so that imported
 726     # \lyxlines would have a different line thickness. The eventual fontsize command
 727     # before \lyxline is therefore removed to get the same output.
 728     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 729                  "large", "Large", "LARGE", "huge", "Huge"]
 730     for n in range(0, len(fontsizes)):
 731         i = 0
 732         k = 0
 733         while i < len(document.body):
 734             i = find_token(document.body, "\\size " + fontsizes[n], i)
 735             k = find_token(document.body, "\\lyxline", i)
 736             # the corresponding fontsize command is always 2 lines before the \lyxline
 737             if (i != -1 and k == i+2):
 738                 document.body[i:i+1] = []
 739             else:
 740                 break
 741         i = i + 1
 742
 743
 744 def revert_encodings(document):
 745     " Set new encodings to auto. "
 746     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 747                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 748                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 749     i = find_token(document.header, "\\inputencoding", 0)
 750     if i == -1:
 751         document.header.append("\\inputencoding auto")
 752     else:
 753         inputenc = get_value(document.header, "\\inputencoding", i)
 754         if inputenc in encodings:
 755             document.header[i] = "\\inputencoding auto"
 756     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 757
 758
 759 def convert_caption(document):
 760     " Convert caption layouts to caption insets. "
 761     i = 0
 762     while 1:
 763         i = find_token(document.body, "\\begin_layout Caption", i)
 764         if i == -1:
 765             return
 766         j = find_end_of_layout(document.body, i)
 767         if j == -1:
 768             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 769             return
 770
 771         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 772         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 773                             "\\begin_inset Caption", "",
 774                             "\\begin_layout %s" % document.default_layout]
 775         i = i + 1
 776
 777
 778 def revert_caption(document):
 779     " Convert caption insets to caption layouts. "
 780     " This assumes that the text class has a caption style. "
 781     i = 0
 782     while 1:
 783         i = find_token(document.body, "\\begin_inset Caption", i)
 784         if i == -1:
 785             return
 786
 787         # We either need to delete the previous \begin_layout line, or we
 788         # need to end the previous layout if this inset is not in the first
 789         # position of the paragraph.
 790         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 791         if layout_before == -1:
 792             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 793             return
 794         layout_line = document.body[layout_before]
 795         del_layout_before = True
 796         l = layout_before + 1
 797         while l < i:
 798             if document.body[l] != "":
 799                 del_layout_before = False
 800                 break
 801             l = l + 1
 802         if del_layout_before:
 803             del document.body[layout_before:i]
 804             i = layout_before
 805         else:
 806             document.body[i:i] = ["\\end_layout", ""]
 807             i = i + 2
 808
 809         # Find start of layout in the inset and end of inset
 810         j = find_token(document.body, "\\begin_layout", i)
 811         if j == -1:
 812             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 813             return
 814         k = find_end_of_inset(document.body, i)
 815         if k == -1:
 816             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 817             return
 818
 819         # We either need to delete the following \end_layout line, or we need
 820         # to restart the old layout if this inset is not at the paragraph end.
 821         layout_after = find_token(document.body, "\\end_layout", k)
 822         if layout_after == -1:
 823             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 824             return
 825         del_layout_after = True
 826         l = k + 1
 827         while l < layout_after:
 828             if document.body[l] != "":
 829                 del_layout_after = False
 830                 break
 831             l = l + 1
 832         if del_layout_after:
 833             del document.body[k+1:layout_after+1]
 834         else:
 835             document.body[k+1:k+1] = [layout_line, ""]
 836
 837         # delete \begin_layout and \end_inset and replace \begin_inset with
 838         # "\begin_layout Caption". This works because we can only have one
 839         # paragraph in the caption inset: The old \end_layout will be recycled.
 840         del document.body[k]
 841         if document.body[k] == "":
 842             del document.body[k]
 843         del document.body[j]
 844         if document.body[j] == "":
 845             del document.body[j]
 846         document.body[i] = "\\begin_layout Caption"
 847         if document.body[i+1] == "":
 848             del document.body[i+1]
 849         i = i + 1
 850
 851
 852 # Accents of InsetLaTeXAccent
 853 accent_map = {
 854     "`" : u'\u0300', # grave
 855     "'" : u'\u0301', # acute
 856     "^" : u'\u0302', # circumflex
 857     "~" : u'\u0303', # tilde
 858     "=" : u'\u0304', # macron
 859     "u" : u'\u0306', # breve
 860     "." : u'\u0307', # dot above
 861     "\"": u'\u0308', # diaresis
 862     "r" : u'\u030a', # ring above
 863     "H" : u'\u030b', # double acute
 864     "v" : u'\u030c', # caron
 865     "b" : u'\u0320', # minus sign below
 866     "d" : u'\u0323', # dot below
 867     "c" : u'\u0327', # cedilla
 868     "k" : u'\u0328', # ogonek
 869     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 870                      # only one is given as argument, so we don't need to
 871                      # treat it differently.
 872 }
 873
 874
 875 # special accents of InsetLaTeXAccent without argument
 876 special_accent_map = {
 877     'i' : u'\u0131', # dotless i
 878     'j' : u'\u0237', # dotless j
 879     'l' : u'\u0142', # l with stroke
 880     'L' : u'\u0141'  # L with stroke
 881 }
 882
 883
 884 # special accent arguments of InsetLaTeXAccent
 885 accented_map = {
 886     '\\i' : u'\u0131', # dotless i
 887     '\\j' : u'\u0237'  # dotless j
 888 }
 889
 890
 891 def _convert_accent(accent, accented_char):
 892     type = accent
 893     char = accented_char
 894     if char == '':
 895         if type in special_accent_map:
 896             return special_accent_map[type]
 897         # a missing char is treated as space by LyX
 898         char = ' '
 899     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 900         # Special caron, only used with t, d, l and L.
 901         # It is not in the map because we convert it to the same unicode
 902         # character as the normal caron: \q{} is only defined if babel with
 903         # the czech or slovak language is used, and the normal caron
 904         # produces the correct output if the T1 font encoding is used.
 905         # For the same reason we never convert to \q{} in the other direction.
 906         type = 'v'
 907     elif char in accented_map:
 908         char = accented_map[char]
 909     elif (len(char) > 1):
 910         # We can only convert accents on a single char
 911         return ''
 912     a = accent_map.get(type)
 913     if a:
 914         return unicodedata.normalize("NFKC", "%s%s" % (char, a))
 915     return ''
 916
 917
 918 def convert_ertbackslash(body, i, ert, default_layout):
 919     r""" -------------------------------------------------------------------------------------------
 920     Convert backslashes and '\n' into valid ERT code, append the converted
 921     text to body[i] and return the (maybe incremented) line index i"""
 922
 923     for c in ert:
 924         if c == '\\':
 925             body[i] = body[i] + '\\backslash '
 926             i = i + 1
 927             body.insert(i, '')
 928         elif c == '\n':
 929             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 930             i = i + 4
 931         else:
 932             body[i] = body[i] + c
 933     return i
 934
 935
 936 def convert_accent(document):
 937     # The following forms are supported by LyX:
 938     # '\i \"{a}' (standard form, as written by LyX)
 939     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 940     # '\i \"{ }' (also accepted if the accented char is a space)
 941     # '\i \" a'  (also accepted)
 942     # '\i \"'    (also accepted)
 943     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 944     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 945     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 946     i = 0
 947     while 1:
 948         i = find_re(document.body, re_wholeinset, i)
 949         if i == -1:
 950             return
 951         match = re_wholeinset.match(document.body[i])
 952         prefix = match.group(1)
 953         contents = match.group(3).strip()
 954         match = re_contents.match(contents)
 955         if match:
 956             # Strip first char (always \)
 957             accent = match.group(1)[1:]
 958             accented_contents = match.group(2).strip()
 959             match = re_accentedcontents.match(accented_contents)
 960             accented_char = match.group(1)
 961             converted = _convert_accent(accent, accented_char)
 962             if converted == '':
 963                 # Normalize contents
 964                 contents = '%s{%s}' % (accent, accented_char),
 965             else:
 966                 document.body[i] = '%s%s' % (prefix, converted)
 967                 i += 1
 968                 continue
 969         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 970         document.body[i] = prefix
 971         document.body[i+1:i+1] = ['\\begin_inset ERT',
 972                                   'status collapsed',
 973                                   '',
 974                                   '\\begin_layout %s' % document.default_layout,
 975                                   '',
 976                                   '',
 977                                   '']
 978         i = convert_ertbackslash(document.body, i + 7,
 979                                  '\\%s' % contents,
 980                                  document.default_layout)
 981         document.body[i+1:i+1] = ['\\end_layout',
 982                                   '',
 983                                   '\\end_inset']
 984         i += 3
 985
 986
 987 def revert_accent(document):
 988     inverse_accent_map = {}
 989     for k in accent_map:
 990         inverse_accent_map[accent_map[k]] = k
 991     inverse_special_accent_map = {}
 992     for k in special_accent_map:
 993         inverse_special_accent_map[special_accent_map[k]] = k
 994     inverse_accented_map = {}
 995     for k in accented_map:
 996         inverse_accented_map[accented_map[k]] = k
 997
 998     # Since LyX may insert a line break within a word we must combine all
 999     # words before unicode normalization.
1000     # We do this only if the next line starts with an accent, otherwise we
1001     # would create things like '\begin_inset ERTstatus'.
1002     numberoflines = len(document.body)
1003     for i in range(numberoflines-1):
1004         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1005             continue
1006         if (document.body[i+1][0] in inverse_accent_map):
1007             # the last character of this line and the first of the next line
1008             # form probably a surrogate pair.
1009             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1010                 document.body[i] += document.body[i+1][0]
1011                 document.body[i+1] = document.body[i+1][1:]
1012
1013     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1014     # This is needed to catch all accented characters.
1015     for i in range(numberoflines):
1016         # Unfortunately we have a mixture of unicode strings and plain strings,
1017         # because we never use u'xxx' for string literals, but 'xxx'.
1018         # Therefore we may have to try two times to normalize the data.
1019         try:
1020             document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1021         except TypeError:
1022             document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1023
1024     # Replace accented characters with InsetLaTeXAccent
1025     # Do not convert characters that can be represented in the chosen
1026     # encoding.
1027     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1028     lang_re = re.compile(r"^\\lang\s(\S+)")
1029     for i in range(len(document.body)):
1030
1031         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1032             # Track the encoding of the current line
1033             result = lang_re.match(document.body[i])
1034             if result:
1035                 language = result.group(1)
1036                 if language == "default":
1037                     encoding_stack[-1] = document.encoding
1038                 else:
1039                     from lyx2lyx_lang import lang
1040                     encoding_stack[-1] = lang[language][3]
1041                 continue
1042             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1043                 encoding_stack.append(encoding_stack[-1])
1044                 continue
1045             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1046                 del encoding_stack[-1]
1047                 continue
1048
1049         for j in range(len(document.body[i])):
1050             # dotless i and dotless j are both in special_accent_map and can
1051             # occur as an accented character, so we need to test that the
1052             # following character is no accent
1053             if (document.body[i][j] in inverse_special_accent_map and
1054                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1055                 accent = document.body[i][j]
1056                 try:
1057                     dummy = accent.encode(encoding_stack[-1])
1058                 except UnicodeEncodeError:
1059                     # Insert the rest of the line as new line
1060                     if j < len(document.body[i]) - 1:
1061                         document.body[i+1:i+1] = document.body[i][j+1:]
1062                     # Delete the accented character
1063                     if j > 0:
1064                         document.body[i] = document.body[i][:j-1]
1065                     else:
1066                         document.body[i] = u''
1067                     # Finally add the InsetLaTeXAccent
1068                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1069                     break
1070             elif j > 0 and document.body[i][j] in inverse_accent_map:
1071                 accented_char = document.body[i][j-1]
1072                 if accented_char == ' ':
1073                     # Conform to LyX output
1074                     accented_char = ''
1075                 elif accented_char in inverse_accented_map:
1076                     accented_char = inverse_accented_map[accented_char]
1077                 accent = document.body[i][j]
1078                 try:
1079                     dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1080                 except UnicodeEncodeError:
1081                     # Insert the rest of the line as new line
1082                     if j < len(document.body[i]) - 1:
1083                         document.body[i+1:i+1] = document.body[i][j+1:]
1084                     # Delete the accented characters
1085                     if j > 1:
1086                         document.body[i] = document.body[i][:j-2]
1087                     else:
1088                         document.body[i] = u''
1089                     # Finally add the InsetLaTeXAccent
1090                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1091                     break
1092     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1093     for i in range(numberoflines):
1094         document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1095
1096
1097 def normalize_font_whitespace(document):
1098     """ Before format 259 the font changes were ignored if a
1099     whitespace was the first or last character in the sequence, this function
1100     transfers the whitespace outside."""
1101
1102     if document.backend != "latex":
1103         return
1104
1105     lines = document.body
1106
1107     char_properties = {"\\series": "default",
1108                        "\\emph": "default",
1109                        "\\color": "none",
1110                        "\\shape": "default",
1111                        "\\bar": "default",
1112                        "\\family": "default"}
1113     changes = {}
1114
1115     i = 0
1116     while i < len(lines):
1117         words = lines[i].split()
1118
1119         if len(words) > 0 and words[0] == "\\begin_layout":
1120             # a new paragraph resets all font changes
1121             changes.clear()
1122
1123         elif len(words) > 1 and words[0] in char_properties.keys():
1124             # we have a font change
1125             if char_properties[words[0]] == words[1]:
1126                 # property gets reset
1127                 if words[0] in changes.keys():
1128                     del changes[words[0]]
1129                 defaultproperty = True
1130             else:
1131                 # property gets set
1132                 changes[words[0]] = words[1]
1133                 defaultproperty = False
1134
1135             # We need to explicitly reset all changed properties if we find
1136             # a space below, because LyX 1.4 would output the space after
1137             # closing the previous change and before starting the new one,
1138             # and closing a font change means to close all properties, not
1139             # just the changed one.
1140
1141             if lines[i-1] and lines[i-1][-1] == " ":
1142                 lines[i-1] = lines[i-1][:-1]
1143                 # a space before the font change
1144                 added_lines = [" "]
1145                 for k in changes.keys():
1146                     # exclude property k because that is already in lines[i]
1147                     if k != words[0]:
1148                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1149                 for k in changes.keys():
1150                     # exclude property k because that must be added below anyway
1151                     if k != words[0]:
1152                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1153                 if defaultproperty:
1154                     # Property is reset in lines[i], so add the new stuff afterwards
1155                     lines[i+1:i+1] = added_lines
1156                 else:
1157                     # Reset property for the space
1158                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1159                     lines[i:i] = added_lines
1160                 i = i + len(added_lines)
1161
1162             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1163                 # a space after the font change
1164                 if (lines[i+1] == " " and lines[i+2]):
1165                     next_words = lines[i+2].split()
1166                     if len(next_words) > 0 and next_words[0] == words[0]:
1167                         # a single blank with a property different from the
1168                         # previous and the next line must not be changed
1169                         i = i + 2
1170                         continue
1171                 lines[i+1] = lines[i+1][1:]
1172                 added_lines = [" "]
1173                 for k in changes.keys():
1174                     # exclude property k because that is already in lines[i]
1175                     if k != words[0]:
1176                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1177                 for k in changes.keys():
1178                     # exclude property k because that must be added below anyway
1179                     if k != words[0]:
1180                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1181                 # Reset property for the space
1182                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1183                 lines[i:i] = added_lines
1184                 i = i + len(added_lines)
1185
1186         i = i + 1
1187
1188
1189 def revert_utf8x(document):
1190     " Set utf8x encoding to utf8. "
1191     i = find_token(document.header, "\\inputencoding", 0)
1192     if i == -1:
1193         document.header.append("\\inputencoding auto")
1194     else:
1195         inputenc = get_value(document.header, "\\inputencoding", i)
1196         if inputenc == "utf8x":
1197             document.header[i] = "\\inputencoding utf8"
1198     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1199
1200
1201 def revert_utf8plain(document):
1202     " Set utf8plain encoding to utf8. "
1203     i = find_token(document.header, "\\inputencoding", 0)
1204     if i == -1:
1205         document.header.append("\\inputencoding auto")
1206     else:
1207         inputenc = get_value(document.header, "\\inputencoding", i)
1208         if inputenc == "utf8-plain":
1209             document.header[i] = "\\inputencoding utf8"
1210     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1211
1212
1213 def revert_beamer_alert(document):
1214     " Revert beamer's \\alert inset back to ERT. "
1215     i = 0
1216     while 1:
1217         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1218         if i == -1:
1219             return
1220         document.body[i] = "\\begin_inset ERT"
1221         i = i + 1
1222         while 1:
1223             if (document.body[i][:13] == "\\begin_layout"):
1224                 # Insert the \alert command
1225                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1226                 break
1227             i = i + 1
1228
1229         i = i + 1
1230
1231
1232 def revert_beamer_structure(document):
1233     " Revert beamer's \\structure inset back to ERT. "
1234     i = 0
1235     while 1:
1236         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1237         if i == -1:
1238             return
1239         document.body[i] = "\\begin_inset ERT"
1240         i = i + 1
1241         while 1:
1242             if (document.body[i][:13] == "\\begin_layout"):
1243                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1244                 break
1245             i = i + 1
1246
1247         i = i + 1
1248
1249
1250 def convert_changes(document):
1251     " Switch output_changes off if tracking_changes is off. "
1252     i = find_token(document.header, '\\tracking_changes', 0)
1253     if i == -1:
1254         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1255         return
1256     j = find_token(document.header, '\\output_changes', 0)
1257     if j == -1:
1258         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1259         return
1260     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1261     output_changes = get_value(document.header, "\\output_changes", j)
1262     if tracking_changes == "false" and output_changes == "true":
1263         document.header[j] = "\\output_changes false"
1264
1265
1266 def revert_ascii(document):
1267     " Set ascii encoding to auto. "
1268     i = find_token(document.header, "\\inputencoding", 0)
1269     if i == -1:
1270         document.header.append("\\inputencoding auto")
1271     else:
1272         inputenc = get_value(document.header, "\\inputencoding", i)
1273         if inputenc == "ascii":
1274             document.header[i] = "\\inputencoding auto"
1275     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1276
1277
1278 def normalize_language_name(document):
1279     lang = { "brazil": "brazilian",
1280              "portuges": "portuguese"}
1281
1282     if document.language in lang:
1283         document.language = lang[document.language]
1284         i = find_token(document.header, "\\language", 0)
1285         document.header[i] = "\\language %s" % document.language
1286
1287
1288 def revert_language_name(document):
1289     lang = { "brazilian": "brazil",
1290              "portuguese": "portuges"}
1291
1292     if document.language in lang:
1293         document.language = lang[document.language]
1294         i = find_token(document.header, "\\language", 0)
1295         document.header[i] = "\\language %s" % document.language
1296
1297 #
1298 #  \textclass cv -> \textclass simplecv
1299 def convert_cv_textclass(document):
1300     if document.textclass == "cv":
1301         document.textclass = "simplecv"
1302
1303
1304 def revert_cv_textclass(document):
1305     if document.textclass == "simplecv":
1306         document.textclass = "cv"
1307
1308
1309 def convert_tableborder(document):
1310     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1311     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1312     i = 0
1313     while i < len(document.body):
1314         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1315         k = document.body[i].find("|>{", 0, len(document.body[i]))
1316         # the two tokens have to be in one line
1317         if (h != -1 and k != -1):
1318             # delete the "|"
1319             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1320         i = i + 1
1321
1322
1323 def revert_tableborder(document):
1324     i = 0
1325     while i < len(document.body):
1326         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1327         k = document.body[i].find(">{", 0, len(document.body[i]))
1328         # the two tokens have to be in one line
1329         if (h != -1 and k != -1):
1330             # add the "|"
1331             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1332         i = i + 1
1333
1334
1335 def revert_armenian(document):
1336
1337     # set inputencoding from armscii8 to auto
1338     if document.inputencoding == "armscii8":
1339         i = find_token(document.header, "\\inputencoding", 0)
1340         if i != -1:
1341             document.header[i] = "\\inputencoding auto"
1342     # check if preamble exists, if not k is set to -1
1343     i = 0
1344     k = -1
1345     while i < len(document.preamble):
1346         if k == -1:
1347             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1348         if k == -1:
1349             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1350         i = i + 1
1351     # add the entry \usepackage{armtex} to the document preamble
1352     if document.language == "armenian":
1353         # set the armtex entry as the first preamble line
1354         if k != -1:
1355             document.preamble[0:0] = ["\\usepackage{armtex}"]
1356         # create the preamble when it doesn't exist
1357         else:
1358             document.preamble.append('\\usepackage{armtex}')
1359     # Set document language from armenian to english
1360     if document.language == "armenian":
1361         document.language = "english"
1362         i = find_token(document.header, "\\language", 0)
1363         if i != -1:
1364             document.header[i] = "\\language english"
1365
1366
1367 def revert_CJK(document):
1368     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1369     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1370                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1371     i = find_token(document.header, "\\inputencoding", 0)
1372     if i == -1:
1373         document.header.append("\\inputencoding auto")
1374     else:
1375         inputenc = get_value(document.header, "\\inputencoding", i)
1376         if inputenc in encodings:
1377             document.header[i] = "\\inputencoding default"
1378     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1379
1380     if document.language == "chinese-simplified" or \
1381        document.language == "chinese-traditional" or \
1382        document.language == "japanese" or document.language == "korean":
1383         document.language = "english"
1384         i = find_token(document.header, "\\language", 0)
1385         if i != -1:
1386             document.header[i] = "\\language english"
1387
1388
1389 def revert_preamble_listings_params(document):
1390     " Revert preamble option \listings_params "
1391     i = find_token(document.header, "\\listings_params", 0)
1392     if i != -1:
1393         document.preamble.append('\\usepackage{listings}')
1394         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1395         document.header.pop(i);
1396
1397
1398 def revert_listings_inset(document):
1399     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1400 FROM
1401
1402 \begin_inset
1403 lstparams "language=Delphi"
1404 inline true
1405 status open
1406
1407 \begin_layout Standard
1408 var i = 10;
1409 \end_layout
1410
1411 \end_inset
1412
1413 TO
1414
1415 \begin_inset ERT
1416 status open
1417 \begin_layout Standard
1418
1419
1420 \backslash
1421 lstinline[language=Delphi]{var i = 10;}
1422 \end_layout
1423
1424 \end_inset
1425 '''
1426     i = 0
1427     while True:
1428         i = find_token(document.body, '\\begin_inset listings', i)
1429         if i == -1:
1430             break
1431         else:
1432             if not '\\usepackage{listings}' in document.preamble:
1433                 document.preamble.append('\\usepackage{listings}')
1434         j = find_end_of_inset(document.body, i + 1)
1435         if j == -1:
1436             # this should not happen
1437             break
1438         inline = 'false'
1439         params = ''
1440         status = 'open'
1441         inlinecode = ''
1442         # first three lines
1443         for line in range(i + 1, i + 4):
1444             if document.body[line].startswith('inline'):
1445                 inline = document.body[line].split()[1]
1446             if document.body[line].startswith('lstparams'):
1447                 params = document.body[line].split()[1].strip('"')
1448             if document.body[line].startswith('status'):
1449                 status = document.body[line].split()[1].strip()
1450                 k = line + 1
1451         # looking for the oneline code for lstinline
1452         for line in range(i + 2, j + 1):
1453             if document.body[line].startswith(r'\end_layout'):
1454                 inlinecode = document.body[line - 1]
1455                 break
1456         if len(params) > 0:
1457             params = '[%s]' % params
1458         if inline == 'true':
1459             document.body[i:(j+1)] = [r'\begin_inset ERT',
1460                                       'status %s' % status,
1461                                       r'\begin_layout Standard',
1462                                       '',
1463                                       '',
1464                                       r'\backslash',
1465                                       'lstinline%s{%s}' % (params, inlinecode),
1466                                       r'\end_layout',
1467                                       '',
1468                                       r'\end_inset']
1469         else:
1470             document.body[i: j+1] =  [r'\begin_inset ERT',
1471                                       'status %s' % status,
1472                                       '',
1473                                       r'\begin_layout Standard',
1474                                       '',
1475                                       '',
1476                                       r'\backslash',
1477                                       r'begin{lstlisting}%s' % params,
1478                                       r'\end_layout'
1479                                     ] + document.body[k : j - 1] + \
1480                                      ['',
1481                                       r'\begin_layout Standard',
1482                                       '',
1483                                       r'\backslash',
1484                                       'end{lstlisting}',
1485                                       r'\end_layout',
1486                                       '',
1487                                       r'\end_inset']
1488
1489
1490 def revert_include_listings(document):
1491     r''' Revert lstinputlisting Include option , translate
1492 \begin_inset Include \lstinputlisting{file}[opt]
1493 preview false
1494
1495 \end_inset
1496
1497 TO
1498
1499 \begin_inset ERT
1500 status open
1501
1502 \begin_layout Standard
1503
1504
1505 \backslash
1506 lstinputlisting{file}[opt]
1507 \end_layout
1508
1509 \end_inset
1510     '''
1511
1512     i = 0
1513     while True:
1514         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1515         if i == -1:
1516             break
1517         else:
1518             if not '\\usepackage{listings}' in document.preamble:
1519                 document.preamble.append('\\usepackage{listings}')
1520         j = find_end_of_inset(document.body, i + 1)
1521         if j == -1:
1522             # this should not happen
1523             break
1524         # find command line
1525         cmd = document.body[i].split()[2]
1526         document.body[i : j + 1] = [r'\begin_inset ERT',
1527                                     'status open',
1528                                     '',
1529                                     r'\begin_layout Standard',
1530                                     '',
1531                                     '',
1532                                     r'\backslash',
1533                                     '%s' % cmd[1:],
1534                                     r'\end_layout',
1535                                     '',
1536                                     r'\end_inset']
1537
1538
1539 ##
1540 # Conversion hub
1541 #
1542
1543 supported_versions = ["1.5.0","1.5"]
1544 convert = [[246, []],
1545            [247, [convert_font_settings]],
1546            [248, []],
1547            [249, [convert_utf8]],
1548            [250, []],
1549            [251, []],
1550            [252, [convert_commandparams, convert_bibitem]],
1551            [253, []],
1552            [254, [convert_esint]],
1553            [255, []],
1554            [256, []],
1555            [257, [convert_caption]],
1556            [258, [convert_lyxline]],
1557            [259, [convert_accent, normalize_font_whitespace]],
1558            [260, []],
1559            [261, [convert_changes]],
1560            [262, []],
1561            [263, [normalize_language_name]],
1562            [264, [convert_cv_textclass]],
1563            [265, [convert_tableborder]],
1564            [266, []],
1565            [267, []],
1566            [268, []],
1567            [269, []]]
1568
1569 revert =  [[269, [revert_beamer_alert, revert_beamer_structure]],
1570            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1571            [267, [revert_CJK]],
1572            [266, [revert_utf8plain]],
1573            [265, [revert_armenian]],
1574            [264, [revert_tableborder]],
1575            [263, [revert_cv_textclass]],
1576            [262, [revert_language_name]],
1577            [261, [revert_ascii]],
1578            [260, []],
1579            [259, [revert_utf8x]],
1580            [258, []],
1581            [257, []],
1582            [256, [revert_caption]],
1583            [255, [revert_encodings]],
1584            [254, [revert_clearpage, revert_cleardoublepage]],
1585            [253, [revert_esint]],
1586            [252, [revert_nomenclature, revert_printnomenclature]],
1587            [251, [revert_commandparams]],
1588            [250, [revert_cs_label]],
1589            [249, []],
1590            [248, [revert_accent, revert_utf8]],
1591            [247, [revert_booktabs]],
1592            [246, [revert_font_settings]],
1593            [245, [revert_framed]]]
1594
1595
1596 if __name__ == "__main__":
1597     pass
1598
1599