lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 1.5"""
  21
  22 import re
  23 import unicodedata
  24
  25 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
  26 from LyX import get_encoding
  27
  28
  29 ####################################################################
  30 # Private helper functions
  31
  32 def find_end_of_inset(lines, i):
  33     " Find end of inset, where lines[i] is included."
  34     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  35
  36 def find_end_of_layout(lines, i):
  37     " Find end of layout, where lines[i] is included."
  38     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  39
  40 # End of helper functions
  41 ####################################################################
  42
  43
  44 ##
  45 #  Notes: Framed/Shaded
  46 #
  47
  48 def revert_framed(document):
  49     "Revert framed notes. "
  50     i = 0
  51     while 1:
  52         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  53
  54         if i == -1:
  55             return
  56         document.body[i] = "\\begin_inset Note"
  57         i = i + 1
  58
  59
  60 ##
  61 #  Fonts
  62 #
  63
  64 roman_fonts      = {'default' : 'default', 'ae'       : 'ae',
  65                     'times'   : 'times',   'palatino' : 'palatino',
  66                     'helvet'  : 'default', 'avant'    : 'default',
  67                     'newcent' : 'newcent', 'bookman'  : 'bookman',
  68                     'pslatex' : 'times'}
  69 sans_fonts       = {'default' : 'default', 'ae'       : 'default',
  70                     'times'   : 'default', 'palatino' : 'default',
  71                     'helvet'  : 'helvet',  'avant'    : 'avant',
  72                     'newcent' : 'default', 'bookman'  : 'default',
  73                     'pslatex' : 'helvet'}
  74 typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
  75                     'times'   : 'default', 'palatino' : 'default',
  76                     'helvet'  : 'default', 'avant'    : 'default',
  77                     'newcent' : 'default', 'bookman'  : 'default',
  78                     'pslatex' : 'courier'}
  79
  80 def convert_font_settings(document):
  81     " Convert font settings. "
  82     i = 0
  83     i = find_token_exact(document.header, "\\fontscheme", i)
  84     if i == -1:
  85         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
  86         return
  87     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
  88     if font_scheme == '':
  89         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
  90         font_scheme = 'default'
  91     if not font_scheme in roman_fonts.keys():
  92         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
  93         font_scheme = 'default'
  94     document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
  95                           '\\font_sans %s' % sans_fonts[font_scheme],
  96                           '\\font_typewriter %s' % typewriter_fonts[font_scheme],
  97                           '\\font_default_family default',
  98                           '\\font_sc false',
  99                           '\\font_osf false',
 100                           '\\font_sf_scale 100',
 101                           '\\font_tt_scale 100']
 102
 103
 104 def revert_font_settings(document):
 105     " Revert font settings. "
 106     i = 0
 107     insert_line = -1
 108     fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
 109     for family in 'roman', 'sans', 'typewriter':
 110         name = '\\font_%s' % family
 111         i = find_token_exact(document.header, name, i)
 112         if i == -1:
 113             document.warning("Malformed LyX document: Missing `%s'." % name)
 114             i = 0
 115         else:
 116             if (insert_line < 0):
 117                 insert_line = i
 118             fonts[family] = get_value(document.header, name, i, i + 1)
 119             del document.header[i]
 120     i = find_token_exact(document.header, '\\font_default_family', i)
 121     if i == -1:
 122         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 123         font_default_family = 'default'
 124     else:
 125         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 126         del document.header[i]
 127     i = find_token_exact(document.header, '\\font_sc', i)
 128     if i == -1:
 129         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 130         font_sc = 'false'
 131     else:
 132         font_sc = get_value(document.header, '\\font_sc', i, i + 1)
 133         del document.header[i]
 134     if font_sc != 'false':
 135         document.warning("Conversion of '\\font_sc' not yet implemented.")
 136     i = find_token_exact(document.header, '\\font_osf', i)
 137     if i == -1:
 138         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 139         font_osf = 'false'
 140     else:
 141         font_osf = get_value(document.header, '\\font_osf', i, i + 1)
 142         del document.header[i]
 143     i = find_token_exact(document.header, '\\font_sf_scale', i)
 144     if i == -1:
 145         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 146         font_sf_scale = '100'
 147     else:
 148         font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
 149         del document.header[i]
 150     if font_sf_scale != '100':
 151         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 152     i = find_token_exact(document.header, '\\font_tt_scale', i)
 153     if i == -1:
 154         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 155         font_tt_scale = '100'
 156     else:
 157         font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
 158         del document.header[i]
 159     if font_tt_scale != '100':
 160         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 161     for font_scheme in roman_fonts.keys():
 162         if (roman_fonts[font_scheme] == fonts['roman'] and
 163             sans_fonts[font_scheme] == fonts['sans'] and
 164             typewriter_fonts[font_scheme] == fonts['typewriter']):
 165             document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 166             if font_default_family != 'default':
 167                 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 168             if font_osf == 'true':
 169                 document.warning("Ignoring `\\font_osf = true'")
 170             return
 171     font_scheme = 'default'
 172     document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
 173     if fonts['roman'] == 'cmr':
 174         document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
 175         if font_osf == 'true':
 176             document.preamble.append('\\usepackage{eco}')
 177             font_osf = 'false'
 178     for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
 179         if fonts['roman'] == font:
 180             document.preamble.append('\\usepackage{%s}' % font)
 181     for font in 'cmss', 'lmss', 'cmbr':
 182         if fonts['sans'] == font:
 183             document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
 184     for font in 'berasans':
 185         if fonts['sans'] == font:
 186             document.preamble.append('\\usepackage{%s}' % font)
 187     for font in 'cmtt', 'lmtt', 'cmtl':
 188         if fonts['typewriter'] == font:
 189             document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
 190     for font in 'courier', 'beramono', 'luximono':
 191         if fonts['typewriter'] == font:
 192             document.preamble.append('\\usepackage{%s}' % font)
 193     if font_default_family != 'default':
 194         document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
 195     if font_osf == 'true':
 196         document.warning("Ignoring `\\font_osf = true'")
 197
 198
 199 def revert_booktabs(document):
 200     " We remove the booktabs flag or everything else will become a mess. "
 201     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 202     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 203     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 204     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 205     i = 0
 206     while 1:
 207         i = find_token(document.body, "\\begin_inset Tabular", i)
 208         if i == -1:
 209             return
 210         j = find_end_of_inset(document.body, i + 1)
 211         if j == -1:
 212             document.warning("Malformed LyX document: Could not find end of tabular.")
 213             continue
 214         for k in range(i, j):
 215             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 216                 document.warning("Converting 'booktabs' table to normal table.")
 217                 document.body[k] = document.body[k].replace(' booktabs="true"', '')
 218             if re.search(re_row, document.body[k]):
 219                 document.warning("Removing extra row space.")
 220                 document.body[k] = re_tspace.sub('', document.body[k])
 221                 document.body[k] = re_bspace.sub('', document.body[k])
 222                 document.body[k] = re_ispace.sub('', document.body[k])
 223         i = i + 1
 224
 225
 226 def convert_multiencoding(document, forward):
 227     """ Fix files with multiple encodings.
 228 Files with an inputencoding of "auto" or "default" and multiple languages
 229 where at least two languages have different default encodings are encoded
 230 in multiple encodings for file formats < 249. These files are incorrectly
 231 read and written (as if the whole file was in the encoding of the main
 232 language).
 233 This is not true for files written by CJK-LyX, they are always in the locale
 234 encoding.
 235
 236 This function
 237 - converts from fake unicode values to true unicode if forward is true, and
 238 - converts from true unicode values to fake unicode if forward is false.
 239 document.encoding must be set to the old value (format 248) in both cases.
 240
 241 We do this here and not in LyX.py because it is far easier to do the
 242 necessary parsing in modern formats than in ancient ones.
 243 """
 244     if document.cjk_encoding != '':
 245         return
 246     encoding_stack = [document.encoding]
 247     lang_re = re.compile(r"^\\lang\s(\S+)")
 248     if document.inputencoding == "auto" or document.inputencoding == "default":
 249         for i in range(len(document.body)):
 250             result = lang_re.match(document.body[i])
 251             if result:
 252                 language = result.group(1)
 253                 if language == "default":
 254                     document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
 255                     encoding_stack[-1] = document.encoding
 256                 else:
 257                     from lyx2lyx_lang import lang
 258                     document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
 259                     encoding_stack[-1] = lang[language][3]
 260             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 261                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 262                 encoding_stack.append(encoding_stack[-1])
 263             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 264                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 265                 if len(encoding_stack) == 1:
 266                     # Don't remove the document encoding from the stack
 267                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 268                 else:
 269                     del encoding_stack[-1]
 270             if encoding_stack[-1] != document.encoding:
 271                 if forward:
 272                     # This line has been incorrectly interpreted as if it was
 273                     # encoded in 'encoding'.
 274                     # Convert back to the 8bit string that was in the file.
 275                     orig = document.body[i].encode(document.encoding)
 276                     # Convert the 8bit string that was in the file to unicode
 277                     # with the correct encoding.
 278                     document.body[i] = orig.decode(encoding_stack[-1])
 279                 else:
 280                     # Convert unicode to the 8bit string that will be written
 281                     # to the file with the correct encoding.
 282                     orig = document.body[i].encode(encoding_stack[-1])
 283                     # Convert the 8bit string that will be written to the
 284                     # file to fake unicode with the encoding that will later
 285                     # be used when writing to the file.
 286                     document.body[i] = orig.decode(document.encoding)
 287
 288
 289 def convert_utf8(document):
 290     " Set document encoding to UTF-8. "
 291     convert_multiencoding(document, True)
 292     document.encoding = "utf8"
 293
 294
 295 def revert_utf8(document):
 296     " Set document encoding to the value corresponding to inputencoding. "
 297     i = find_token(document.header, "\\inputencoding", 0)
 298     if i == -1:
 299         document.header.append("\\inputencoding auto")
 300     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 301         document.header[i] = "\\inputencoding auto"
 302     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 303     document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
 304     convert_multiencoding(document, False)
 305
 306
 307 def revert_cs_label(document):
 308     " Remove status flag of charstyle label. "
 309     i = 0
 310     while 1:
 311         i = find_token(document.body, "\\begin_inset CharStyle", i)
 312         if i == -1:
 313             return
 314         # Seach for a line starting 'show_label'
 315         # If it is not there, break with a warning message
 316         i = i + 1
 317         while 1:
 318             if (document.body[i][:10] == "show_label"):
 319                 del document.body[i]
 320                 break
 321             elif (document.body[i][:13] == "\\begin_layout"):
 322                 document.warning("Malformed LyX document: Missing 'show_label'.")
 323                 break
 324             i = i + 1
 325
 326         i = i + 1
 327
 328
 329 def convert_bibitem(document):
 330     """ Convert
 331 \bibitem [option]{argument}
 332
 333 to
 334
 335 \begin_inset LatexCommand bibitem
 336 label "option"
 337 key "argument"
 338
 339 \end_inset
 340
 341 This must be called after convert_commandparams.
 342 """
 343     regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
 344     i = 0
 345     while 1:
 346         i = find_token(document.body, "\\bibitem", i)
 347         if i == -1:
 348             break
 349         match = re.match(regex, document.body[i])
 350         option = match.group(1)
 351         argument = match.group(2)
 352         lines = ['\\begin_inset LatexCommand bibitem']
 353         if option != None:
 354             lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
 355         lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
 356         lines.append('')
 357         lines.append('\\end_inset')
 358         document.body[i:i+1] = lines
 359         i = i + 1
 360
 361
 362 commandparams_info = {
 363     # command : [option1, option2, argument]
 364     "bibitem" : ["label", "", "key"],
 365     "bibtex" : ["options", "btprint", "bibfiles"],
 366     "cite"        : ["after", "before", "key"],
 367     "citet"       : ["after", "before", "key"],
 368     "citep"       : ["after", "before", "key"],
 369     "citealt"     : ["after", "before", "key"],
 370     "citealp"     : ["after", "before", "key"],
 371     "citeauthor"  : ["after", "before", "key"],
 372     "citeyear"    : ["after", "before", "key"],
 373     "citeyearpar" : ["after", "before", "key"],
 374     "citet*"      : ["after", "before", "key"],
 375     "citep*"      : ["after", "before", "key"],
 376     "citealt*"    : ["after", "before", "key"],
 377     "citealp*"    : ["after", "before", "key"],
 378     "citeauthor*" : ["after", "before", "key"],
 379     "Citet"       : ["after", "before", "key"],
 380     "Citep"       : ["after", "before", "key"],
 381     "Citealt"     : ["after", "before", "key"],
 382     "Citealp"     : ["after", "before", "key"],
 383     "Citeauthor"  : ["after", "before", "key"],
 384     "Citet*"      : ["after", "before", "key"],
 385     "Citep*"      : ["after", "before", "key"],
 386     "Citealt*"    : ["after", "before", "key"],
 387     "Citealp*"    : ["after", "before", "key"],
 388     "Citeauthor*" : ["after", "before", "key"],
 389     "citefield"   : ["after", "before", "key"],
 390     "citetitle"   : ["after", "before", "key"],
 391     "cite*"       : ["after", "before", "key"],
 392     "hfill" : ["", "", ""],
 393     "index"      : ["", "", "name"],
 394     "printindex" : ["", "", "name"],
 395     "label" : ["", "", "name"],
 396     "eqref"     : ["name", "", "reference"],
 397     "pageref"   : ["name", "", "reference"],
 398     "prettyref" : ["name", "", "reference"],
 399     "ref"       : ["name", "", "reference"],
 400     "vpageref"  : ["name", "", "reference"],
 401     "vref"      : ["name", "", "reference"],
 402     "tableofcontents" : ["", "", "type"],
 403     "htmlurl" : ["name", "", "target"],
 404     "url"     : ["name", "", "target"]}
 405
 406
 407 def convert_commandparams(document):
 408     """ Convert
 409
 410  \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
 411  \end_inset
 412
 413  to
 414
 415  \begin_inset LatexCommand cmdname
 416  name1 "opt1"
 417  name2 "opt2"
 418  name3 "arg"
 419  \end_inset
 420
 421  name1, name2 and name3 can be different for each command.
 422 """
 423     # \begin_inset LatexCommand bibitem was not the official version (see
 424     # convert_bibitem()), but could be read in, so we convert it here, too.
 425
 426     i = 0
 427     while 1:
 428         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 429         if i == -1:
 430             break
 431         command = document.body[i][26:].strip()
 432         if command == "":
 433             document.warning("Malformed LyX document: Missing LatexCommand name.")
 434             i = i + 1
 435             continue
 436
 437         # The following parser is taken from the original InsetCommandParams::scanCommand
 438         name = ""
 439         option1 = ""
 440         option2 = ""
 441         argument = ""
 442         state = "WS"
 443         # Used to handle things like \command[foo[bar]]{foo{bar}}
 444         nestdepth = 0
 445         b = 0
 446         for c in command:
 447             if ((state == "CMDNAME" and c == ' ') or
 448                 (state == "CMDNAME" and c == '[') or
 449                 (state == "CMDNAME" and c == '{')):
 450                 state = "WS"
 451             if ((state == "OPTION" and c == ']') or
 452                 (state == "SECOPTION" and c == ']') or
 453                 (state == "CONTENT" and c == '}')):
 454                 if nestdepth == 0:
 455                     state = "WS"
 456                 else:
 457                     nestdepth = nestdepth - 1
 458             if ((state == "OPTION" and c == '[') or
 459                 (state == "SECOPTION" and c == '[') or
 460                 (state == "CONTENT" and c == '{')):
 461                 nestdepth = nestdepth + 1
 462             if state == "CMDNAME":
 463                     name += c
 464             elif state == "OPTION":
 465                     option1 += c
 466             elif state == "SECOPTION":
 467                     option2 += c
 468             elif state == "CONTENT":
 469                     argument += c
 470             elif state == "WS":
 471                 if c == '\\':
 472                     state = "CMDNAME"
 473                 elif c == '[' and b != ']':
 474                     state = "OPTION"
 475                     nestdepth = 0 # Just to be sure
 476                 elif c == '[' and b == ']':
 477                     state = "SECOPTION"
 478                     nestdepth = 0 # Just to be sure
 479                 elif c == '{':
 480                     state = "CONTENT"
 481                     nestdepth = 0 # Just to be sure
 482             b = c
 483
 484         # Now we have parsed the command, output the parameters
 485         lines = ["\\begin_inset LatexCommand %s" % name]
 486         if option1 != "":
 487             if commandparams_info[name][0] == "":
 488                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
 489             else:
 490                 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
 491         if option2 != "":
 492             if commandparams_info[name][1] == "":
 493                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
 494             else:
 495                 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
 496         if argument != "":
 497             if commandparams_info[name][2] == "":
 498                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
 499             else:
 500                 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
 501         document.body[i:i+1] = lines
 502         i = i + 1
 503
 504
 505 def revert_commandparams(document):
 506     regex = re.compile(r'(\S+)\s+(.+)')
 507     i = 0
 508     while 1:
 509         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 510         if i == -1:
 511             break
 512         name = document.body[i].split()[2]
 513         j = find_end_of_inset(document.body, i + 1)
 514         preview_line = ""
 515         option1 = ""
 516         option2 = ""
 517         argument = ""
 518         for k in range(i + 1, j):
 519             match = re.match(regex, document.body[k])
 520             if match:
 521                 pname = match.group(1)
 522                 pvalue = match.group(2)
 523                 if pname == "preview":
 524                     preview_line = document.body[k]
 525                 elif (commandparams_info[name][0] != "" and
 526                       pname == commandparams_info[name][0]):
 527                     option1 = pvalue.strip('"').replace('\\"', '"')
 528                 elif (commandparams_info[name][1] != "" and
 529                       pname == commandparams_info[name][1]):
 530                     option2 = pvalue.strip('"').replace('\\"', '"')
 531                 elif (commandparams_info[name][2] != "" and
 532                       pname == commandparams_info[name][2]):
 533                     argument = pvalue.strip('"').replace('\\"', '"')
 534             elif document.body[k].strip() != "":
 535                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
 536         if name == "bibitem":
 537             if option1 == "":
 538                 lines = ["\\bibitem {%s}" % argument]
 539             else:
 540                 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
 541         else:
 542             if option1 == "":
 543                 if option2 == "":
 544                     lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
 545                 else:
 546                     lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
 547             else:
 548                 if option2 == "":
 549                     lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
 550                 else:
 551                     lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
 552         if name != "bibitem":
 553             if preview_line != "":
 554                 lines.append(preview_line)
 555             lines.append('')
 556             lines.append('\\end_inset')
 557         document.body[i:j+1] = lines
 558         i = j + 1
 559
 560
 561 def revert_nomenclature(document):
 562     " Convert nomenclature entry to ERT. "
 563     regex = re.compile(r'(\S+)\s+(.+)')
 564     i = 0
 565     use_nomencl = 0
 566     while 1:
 567         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 568         if i == -1:
 569             break
 570         use_nomencl = 1
 571         j = find_end_of_inset(document.body, i + 1)
 572         preview_line = ""
 573         symbol = ""
 574         description = ""
 575         prefix = ""
 576         for k in range(i + 1, j):
 577             match = re.match(regex, document.body[k])
 578             if match:
 579                 name = match.group(1)
 580                 value = match.group(2)
 581                 if name == "preview":
 582                     preview_line = document.body[k]
 583                 elif name == "symbol":
 584                     symbol = value.strip('"').replace('\\"', '"')
 585                 elif name == "description":
 586                     description = value.strip('"').replace('\\"', '"')
 587                 elif name == "prefix":
 588                     prefix = value.strip('"').replace('\\"', '"')
 589             elif document.body[k].strip() != "":
 590                 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
 591         if prefix == "":
 592             command = 'nomenclature{%s}{%s}' % (symbol, description)
 593         else:
 594             command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
 595         document.body[i:j+1] = ['\\begin_inset ERT',
 596                                 'status collapsed',
 597                                 '',
 598                                 '\\begin_layout %s' % document.default_layout,
 599                                 '',
 600                                 '',
 601                                 '\\backslash',
 602                                 command,
 603                                 '\\end_layout',
 604                                 '',
 605                                 '\\end_inset']
 606         i = i + 11
 607     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 608         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 609         document.preamble.append('\\makenomenclature')
 610
 611
 612 def revert_printnomenclature(document):
 613     " Convert printnomenclature to ERT. "
 614     regex = re.compile(r'(\S+)\s+(.+)')
 615     i = 0
 616     use_nomencl = 0
 617     while 1:
 618         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 619         if i == -1:
 620             break
 621         use_nomencl = 1
 622         j = find_end_of_inset(document.body, i + 1)
 623         preview_line = ""
 624         labelwidth = ""
 625         for k in range(i + 1, j):
 626             match = re.match(regex, document.body[k])
 627             if match:
 628                 name = match.group(1)
 629                 value = match.group(2)
 630                 if name == "preview":
 631                     preview_line = document.body[k]
 632                 elif name == "labelwidth":
 633                     labelwidth = value.strip('"').replace('\\"', '"')
 634             elif document.body[k].strip() != "":
 635                 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
 636         if labelwidth == "":
 637             command = 'nomenclature{}'
 638         else:
 639             command = 'nomenclature[%s]' % labelwidth
 640         document.body[i:j+1] = ['\\begin_inset ERT',
 641                                 'status collapsed',
 642                                 '',
 643                                 '\\begin_layout %s' % document.default_layout,
 644                                 '',
 645                                 '',
 646                                 '\\backslash',
 647                                 command,
 648                                 '\\end_layout',
 649                                 '',
 650                                 '\\end_inset']
 651         i = i + 11
 652     if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
 653         document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
 654         document.preamble.append('\\makenomenclature')
 655
 656
 657 def convert_esint(document):
 658     " Add \\use_esint setting to header. "
 659     i = find_token(document.header, "\\cite_engine", 0)
 660     if i == -1:
 661         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 662         return
 663     # 0 is off, 1 is auto, 2 is on.
 664     document.header.insert(i, '\\use_esint 0')
 665
 666
 667 def revert_esint(document):
 668     " Remove \\use_esint setting from header. "
 669     i = find_token(document.header, "\\use_esint", 0)
 670     if i == -1:
 671         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 672         return
 673     use_esint = document.header[i].split()[1]
 674     del document.header[i]
 675     # 0 is off, 1 is auto, 2 is on.
 676     if (use_esint == 2):
 677         document.preamble.append('\\usepackage{esint}')
 678
 679
 680 def revert_clearpage(document):
 681     " clearpage -> ERT "
 682     i = 0
 683     while 1:
 684         i = find_token(document.body, "\\clearpage", i)
 685         if i == -1:
 686             break
 687         document.body[i:i+1] =  ['\\begin_inset ERT',
 688                                 'status collapsed',
 689                                 '',
 690                                 '\\begin_layout %s' % document.default_layout,
 691                                 '',
 692                                 '',
 693                                 '\\backslash',
 694                                 'clearpage',
 695                                 '\\end_layout',
 696                                 '',
 697                                 '\\end_inset']
 698     i = i + 1
 699
 700
 701 def revert_cleardoublepage(document):
 702     " cleardoublepage -> ERT "
 703     i = 0
 704     while 1:
 705         i = find_token(document.body, "\\cleardoublepage", i)
 706         if i == -1:
 707             break
 708         document.body[i:i+1] =  ['\\begin_inset ERT',
 709                                 'status collapsed',
 710                                 '',
 711                                 '\\begin_layout %s' % document.default_layout,
 712                                 '',
 713                                 '',
 714                                 '\\backslash',
 715                                 'cleardoublepage',
 716                                 '\\end_layout',
 717                                 '',
 718                                 '\\end_inset']
 719     i = i + 1
 720
 721
 722 def convert_lyxline(document):
 723     " remove fontsize commands for \lyxline "
 724     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 725     # to change the line thickness. The new definiton does this so that imported
 726     # \lyxlines would have a different line thickness. The eventual fontsize command
 727     # before \lyxline is therefore removed to get the same output.
 728     fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
 729                  "large", "Large", "LARGE", "huge", "Huge"]
 730     for n in range(0, len(fontsizes)):
 731         i = 0
 732         k = 0
 733         while i < len(document.body):
 734             i = find_token(document.body, "\\size " + fontsizes[n], i)
 735             k = find_token(document.body, "\\lyxline", i)
 736             # the corresponding fontsize command is always 2 lines before the \lyxline
 737             if (i != -1 and k == i+2):
 738                 document.body[i:i+1] = []
 739             else:
 740                 break
 741         i = i + 1
 742
 743
 744 def revert_encodings(document):
 745     " Set new encodings to auto. "
 746     encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
 747                  "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
 748                  "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
 749     i = find_token(document.header, "\\inputencoding", 0)
 750     if i == -1:
 751         document.header.append("\\inputencoding auto")
 752     else:
 753         inputenc = get_value(document.header, "\\inputencoding", i)
 754         if inputenc in encodings:
 755             document.header[i] = "\\inputencoding auto"
 756     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 757
 758
 759 def convert_caption(document):
 760     " Convert caption layouts to caption insets. "
 761     i = 0
 762     while 1:
 763         i = find_token(document.body, "\\begin_layout Caption", i)
 764         if i == -1:
 765             return
 766         j = find_end_of_layout(document.body, i)
 767         if j == -1:
 768             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 769             return
 770
 771         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
 772         document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
 773                             "\\begin_inset Caption", "",
 774                             "\\begin_layout %s" % document.default_layout]
 775         i = i + 1
 776
 777
 778 def revert_caption(document):
 779     " Convert caption insets to caption layouts. "
 780     " This assumes that the text class has a caption style. "
 781     i = 0
 782     while 1:
 783         i = find_token(document.body, "\\begin_inset Caption", i)
 784         if i == -1:
 785             return
 786
 787         # We either need to delete the previous \begin_layout line, or we
 788         # need to end the previous layout if this inset is not in the first
 789         # position of the paragraph.
 790         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
 791         if layout_before == -1:
 792             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 793             return
 794         layout_line = document.body[layout_before]
 795         del_layout_before = True
 796         l = layout_before + 1
 797         while l < i:
 798             if document.body[l] != "":
 799                 del_layout_before = False
 800                 break
 801             l = l + 1
 802         if del_layout_before:
 803             del document.body[layout_before:i]
 804             i = layout_before
 805         else:
 806             document.body[i:i] = ["\\end_layout", ""]
 807             i = i + 2
 808
 809         # Find start of layout in the inset and end of inset
 810         j = find_token(document.body, "\\begin_layout", i)
 811         if j == -1:
 812             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
 813             return
 814         k = find_end_of_inset(document.body, i)
 815         if k == -1:
 816             document.warning("Malformed LyX document: Missing `\\end_inset'.")
 817             return
 818
 819         # We either need to delete the following \end_layout line, or we need
 820         # to restart the old layout if this inset is not at the paragraph end.
 821         layout_after = find_token(document.body, "\\end_layout", k)
 822         if layout_after == -1:
 823             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 824             return
 825         del_layout_after = True
 826         l = k + 1
 827         while l < layout_after:
 828             if document.body[l] != "":
 829                 del_layout_after = False
 830                 break
 831             l = l + 1
 832         if del_layout_after:
 833             del document.body[k+1:layout_after+1]
 834         else:
 835             document.body[k+1:k+1] = [layout_line, ""]
 836
 837         # delete \begin_layout and \end_inset and replace \begin_inset with
 838         # "\begin_layout Caption". This works because we can only have one
 839         # paragraph in the caption inset: The old \end_layout will be recycled.
 840         del document.body[k]
 841         if document.body[k] == "":
 842             del document.body[k]
 843         del document.body[j]
 844         if document.body[j] == "":
 845             del document.body[j]
 846         document.body[i] = "\\begin_layout Caption"
 847         if document.body[i+1] == "":
 848             del document.body[i+1]
 849         i = i + 1
 850
 851
 852 # Accents of InsetLaTeXAccent
 853 accent_map = {
 854     "`" : u'\u0300', # grave
 855     "'" : u'\u0301', # acute
 856     "^" : u'\u0302', # circumflex
 857     "~" : u'\u0303', # tilde
 858     "=" : u'\u0304', # macron
 859     "u" : u'\u0306', # breve
 860     "." : u'\u0307', # dot above
 861     "\"": u'\u0308', # diaresis
 862     "r" : u'\u030a', # ring above
 863     "H" : u'\u030b', # double acute
 864     "v" : u'\u030c', # caron
 865     "b" : u'\u0320', # minus sign below
 866     "d" : u'\u0323', # dot below
 867     "c" : u'\u0327', # cedilla
 868     "k" : u'\u0328', # ogonek
 869     "t" : u'\u0361'  # tie. This is special: It spans two characters, but
 870                      # only one is given as argument, so we don't need to
 871                      # treat it differently.
 872 }
 873
 874
 875 # special accents of InsetLaTeXAccent without argument
 876 special_accent_map = {
 877     'i' : u'\u0131', # dotless i
 878     'j' : u'\u0237', # dotless j
 879     'l' : u'\u0142', # l with stroke
 880     'L' : u'\u0141'  # L with stroke
 881 }
 882
 883
 884 # special accent arguments of InsetLaTeXAccent
 885 accented_map = {
 886     '\\i' : u'\u0131', # dotless i
 887     '\\j' : u'\u0237'  # dotless j
 888 }
 889
 890
 891 def _convert_accent(accent, accented_char):
 892     type = accent
 893     char = accented_char
 894     if char == '':
 895         if type in special_accent_map:
 896             return special_accent_map[type]
 897         # a missing char is treated as space by LyX
 898         char = ' '
 899     elif type == 'q' and char in ['t', 'd', 'l', 'L']:
 900         # Special caron, only used with t, d, l and L.
 901         # It is not in the map because we convert it to the same unicode
 902         # character as the normal caron: \q{} is only defined if babel with
 903         # the czech or slovak language is used, and the normal caron
 904         # produces the correct output if the T1 font encoding is used.
 905         # For the same reason we never convert to \q{} in the other direction.
 906         type = 'v'
 907     elif char in accented_map:
 908         char = accented_map[char]
 909     elif (len(char) > 1):
 910         # We can only convert accents on a single char
 911         return ''
 912     a = accent_map.get(type)
 913     if a:
 914         return unicodedata.normalize("NFKC", "%s%s" % (char, a))
 915     return ''
 916
 917
 918 def convert_ertbackslash(body, i, ert, default_layout):
 919     r""" -------------------------------------------------------------------------------------------
 920     Convert backslashes and '\n' into valid ERT code, append the converted
 921     text to body[i] and return the (maybe incremented) line index i"""
 922
 923     for c in ert:
 924         if c == '\\':
 925             body[i] = body[i] + '\\backslash '
 926             i = i + 1
 927             body.insert(i, '')
 928         elif c == '\n':
 929             body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
 930             i = i + 4
 931         else:
 932             body[i] = body[i] + c
 933     return i
 934
 935
 936 def convert_accent(document):
 937     # The following forms are supported by LyX:
 938     # '\i \"{a}' (standard form, as written by LyX)
 939     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
 940     # '\i \"{ }' (also accepted if the accented char is a space)
 941     # '\i \" a'  (also accepted)
 942     # '\i \"'    (also accepted)
 943     re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
 944     re_contents = re.compile(r'^([^\s{]+)(.*)$')
 945     re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
 946     i = 0
 947     while 1:
 948         i = find_re(document.body, re_wholeinset, i)
 949         if i == -1:
 950             return
 951         match = re_wholeinset.match(document.body[i])
 952         prefix = match.group(1)
 953         contents = match.group(3).strip()
 954         match = re_contents.match(contents)
 955         if match:
 956             # Strip first char (always \)
 957             accent = match.group(1)[1:]
 958             accented_contents = match.group(2).strip()
 959             match = re_accentedcontents.match(accented_contents)
 960             accented_char = match.group(1)
 961             converted = _convert_accent(accent, accented_char)
 962             if converted == '':
 963                 # Normalize contents
 964                 contents = '%s{%s}' % (accent, accented_char),
 965             else:
 966                 document.body[i] = '%s%s' % (prefix, converted)
 967                 i += 1
 968                 continue
 969         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
 970         document.body[i] = prefix
 971         document.body[i+1:i+1] = ['\\begin_inset ERT',
 972                                   'status collapsed',
 973                                   '',
 974                                   '\\begin_layout %s' % document.default_layout,
 975                                   '',
 976                                   '',
 977                                   '']
 978         i = convert_ertbackslash(document.body, i + 7,
 979                                  '\\%s' % contents,
 980                                  document.default_layout)
 981         document.body[i+1:i+1] = ['\\end_layout',
 982                                   '',
 983                                   '\\end_inset']
 984         i += 3
 985
 986
 987 def revert_accent(document):
 988     inverse_accent_map = {}
 989     for k in accent_map:
 990         inverse_accent_map[accent_map[k]] = k
 991     inverse_special_accent_map = {}
 992     for k in special_accent_map:
 993         inverse_special_accent_map[special_accent_map[k]] = k
 994     inverse_accented_map = {}
 995     for k in accented_map:
 996         inverse_accented_map[accented_map[k]] = k
 997
 998     # Since LyX may insert a line break within a word we must combine all
 999     # words before unicode normalization.
1000     # We do this only if the next line starts with an accent, otherwise we
1001     # would create things like '\begin_inset ERTstatus'.
1002     numberoflines = len(document.body)
1003     for i in range(numberoflines-1):
1004         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1005             continue
1006         if (document.body[i+1][0] in inverse_accent_map):
1007             # the last character of this line and the first of the next line
1008             # form probably a surrogate pair.
1009             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1010                 document.body[i] += document.body[i+1][0]
1011                 document.body[i+1] = document.body[i+1][1:]
1012
1013     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1014     # This is needed to catch all accented characters.
1015     for i in range(numberoflines):
1016         # Unfortunately we have a mixture of unicode strings and plain strings,
1017         # because we never use u'xxx' for string literals, but 'xxx'.
1018         # Therefore we may have to try two times to normalize the data.
1019         try:
1020             document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1021         except TypeError:
1022             document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1023
1024     # Replace accented characters with InsetLaTeXAccent
1025     # Do not convert characters that can be represented in the chosen
1026     # encoding.
1027     encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1028     lang_re = re.compile(r"^\\lang\s(\S+)")
1029     for i in range(len(document.body)):
1030
1031         if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1032             # Track the encoding of the current line
1033             result = lang_re.match(document.body[i])
1034             if result:
1035                 language = result.group(1)
1036                 if language == "default":
1037                     encoding_stack[-1] = document.encoding
1038                 else:
1039                     from lyx2lyx_lang import lang
1040                     encoding_stack[-1] = lang[language][3]
1041                 continue
1042             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1043                 encoding_stack.append(encoding_stack[-1])
1044                 continue
1045             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1046                 del encoding_stack[-1]
1047                 continue
1048
1049         for j in range(len(document.body[i])):
1050             # dotless i and dotless j are both in special_accent_map and can
1051             # occur as an accented character, so we need to test that the
1052             # following character is no accent
1053             if (document.body[i][j] in inverse_special_accent_map and
1054                 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1055                 accent = document.body[i][j]
1056                 try:
1057                     dummy = accent.encode(encoding_stack[-1])
1058                 except UnicodeEncodeError:
1059                     # Insert the rest of the line as new line
1060                     if j < len(document.body[i]) - 1:
1061                         document.body[i+1:i+1] = document.body[i][j+1:]
1062                     # Delete the accented character
1063                     if j > 0:
1064                         document.body[i] = document.body[i][:j-1]
1065                     else:
1066                         document.body[i] = u''
1067                     # Finally add the InsetLaTeXAccent
1068                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1069                     break
1070             elif j > 0 and document.body[i][j] in inverse_accent_map:
1071                 accented_char = document.body[i][j-1]
1072                 if accented_char == ' ':
1073                     # Conform to LyX output
1074                     accented_char = ''
1075                 elif accented_char in inverse_accented_map:
1076                     accented_char = inverse_accented_map[accented_char]
1077                 accent = document.body[i][j]
1078                 try:
1079                     dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1080                 except UnicodeEncodeError:
1081                     # Insert the rest of the line as new line
1082                     if j < len(document.body[i]) - 1:
1083                         document.body[i+1:i+1] = document.body[i][j+1:]
1084                     # Delete the accented characters
1085                     if j > 1:
1086                         document.body[i] = document.body[i][:j-2]
1087                     else:
1088                         document.body[i] = u''
1089                     # Finally add the InsetLaTeXAccent
1090                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1091                     break
1092     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1093     for i in range(numberoflines):
1094         document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1095
1096
1097 def normalize_font_whitespace(document):
1098     """ Before format 259 the font changes were ignored if a
1099     whitespace was the first or last character in the sequence, this function
1100     transfers the whitespace outside."""
1101
1102     if document.backend != "latex":
1103         return
1104
1105     lines = document.body
1106
1107     char_properties = {"\\series": "default",
1108                        "\\emph": "default",
1109                        "\\color": "none",
1110                        "\\shape": "default",
1111                        "\\bar": "default",
1112                        "\\family": "default"}
1113     changes = {}
1114
1115     i = 0
1116     while i < len(lines):
1117         words = lines[i].split()
1118
1119         if len(words) > 0 and words[0] == "\\begin_layout":
1120             # a new paragraph resets all font changes
1121             changes.clear()
1122
1123         elif len(words) > 1 and words[0] in char_properties.keys():
1124             # we have a font change
1125             if char_properties[words[0]] == words[1]:
1126                 # property gets reset
1127                 if words[0] in changes.keys():
1128                     del changes[words[0]]
1129                 defaultproperty = True
1130             else:
1131                 # property gets set
1132                 changes[words[0]] = words[1]
1133                 defaultproperty = False
1134
1135             # We need to explicitly reset all changed properties if we find
1136             # a space below, because LyX 1.4 would output the space after
1137             # closing the previous change and before starting the new one,
1138             # and closing a font change means to close all properties, not
1139             # just the changed one.
1140
1141             if lines[i-1] and lines[i-1][-1] == " ":
1142                 lines[i-1] = lines[i-1][:-1]
1143                 # a space before the font change
1144                 added_lines = [" "]
1145                 for k in changes.keys():
1146                     # exclude property k because that is already in lines[i]
1147                     if k != words[0]:
1148                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1149                 for k in changes.keys():
1150                     # exclude property k because that must be added below anyway
1151                     if k != words[0]:
1152                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1153                 if defaultproperty:
1154                     # Property is reset in lines[i], so add the new stuff afterwards
1155                     lines[i+1:i+1] = added_lines
1156                 else:
1157                     # Reset property for the space
1158                     added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1159                     lines[i:i] = added_lines
1160                 i = i + len(added_lines)
1161
1162             elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1163                 # a space after the font change
1164                 if (lines[i+1] == " " and lines[i+2]):
1165                     next_words = lines[i+2].split()
1166                     if len(next_words) > 0 and next_words[0] == words[0]:
1167                         # a single blank with a property different from the
1168                         # previous and the next line must not be changed
1169                         i = i + 2
1170                         continue
1171                 lines[i+1] = lines[i+1][1:]
1172                 added_lines = [" "]
1173                 for k in changes.keys():
1174                     # exclude property k because that is already in lines[i]
1175                     if k != words[0]:
1176                         added_lines[1:1] = ["%s %s" % (k, changes[k])]
1177                 for k in changes.keys():
1178                     # exclude property k because that must be added below anyway
1179                     if k != words[0]:
1180                         added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1181                 # Reset property for the space
1182                 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1183                 lines[i:i] = added_lines
1184                 i = i + len(added_lines)
1185
1186         i = i + 1
1187
1188
1189 def revert_utf8x(document):
1190     " Set utf8x encoding to utf8. "
1191     i = find_token(document.header, "\\inputencoding", 0)
1192     if i == -1:
1193         document.header.append("\\inputencoding auto")
1194     else:
1195         inputenc = get_value(document.header, "\\inputencoding", i)
1196         if inputenc == "utf8x":
1197             document.header[i] = "\\inputencoding utf8"
1198     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1199
1200
1201 def revert_utf8plain(document):
1202     " Set utf8plain encoding to utf8. "
1203     i = find_token(document.header, "\\inputencoding", 0)
1204     if i == -1:
1205         document.header.append("\\inputencoding auto")
1206     else:
1207         inputenc = get_value(document.header, "\\inputencoding", i)
1208         if inputenc == "utf8-plain":
1209             document.header[i] = "\\inputencoding utf8"
1210     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1211
1212
1213 def convert_changes(document):
1214     " Switch output_changes off if tracking_changes is off. "
1215     i = find_token(document.header, '\\tracking_changes', 0)
1216     if i == -1:
1217         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1218         return
1219     j = find_token(document.header, '\\output_changes', 0)
1220     if j == -1:
1221         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1222         return
1223     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1224     output_changes = get_value(document.header, "\\output_changes", j)
1225     if tracking_changes == "false" and output_changes == "true":
1226         document.header[j] = "\\output_changes false"
1227
1228
1229 def revert_ascii(document):
1230     " Set ascii encoding to auto. "
1231     i = find_token(document.header, "\\inputencoding", 0)
1232     if i == -1:
1233         document.header.append("\\inputencoding auto")
1234     else:
1235         inputenc = get_value(document.header, "\\inputencoding", i)
1236         if inputenc == "ascii":
1237             document.header[i] = "\\inputencoding auto"
1238     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1239
1240
1241 def normalize_language_name(document):
1242     lang = { "brazil": "brazilian",
1243              "portuges": "portuguese"}
1244
1245     if document.language in lang:
1246         document.language = lang[document.language]
1247         i = find_token(document.header, "\\language", 0)
1248         document.header[i] = "\\language %s" % document.language
1249
1250
1251 def revert_language_name(document):
1252     lang = { "brazilian": "brazil",
1253              "portuguese": "portuges"}
1254
1255     if document.language in lang:
1256         document.language = lang[document.language]
1257         i = find_token(document.header, "\\language", 0)
1258         document.header[i] = "\\language %s" % document.language
1259
1260 #
1261 #  \textclass cv -> \textclass simplecv
1262 def convert_cv_textclass(document):
1263     if document.textclass == "cv":
1264         document.textclass = "simplecv"
1265
1266
1267 def revert_cv_textclass(document):
1268     if document.textclass == "simplecv":
1269         document.textclass = "cv"
1270
1271
1272 def convert_tableborder(document):
1273     # The problematic is: LyX double the table cell border as it ignores the "|" character in
1274     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1275     i = 0
1276     while i < len(document.body):
1277         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1278         k = document.body[i].find("|>{", 0, len(document.body[i]))
1279         # the two tokens have to be in one line
1280         if (h != -1 and k != -1):
1281             # delete the "|"
1282             document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1283         i = i + 1
1284
1285
1286 def revert_tableborder(document):
1287     i = 0
1288     while i < len(document.body):
1289         h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1290         k = document.body[i].find(">{", 0, len(document.body[i]))
1291         # the two tokens have to be in one line
1292         if (h != -1 and k != -1):
1293             # add the "|"
1294             document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1295         i = i + 1
1296
1297
1298 def revert_armenian(document):
1299
1300     # set inputencoding from armscii8 to auto
1301     if document.inputencoding == "armscii8":
1302         i = find_token(document.header, "\\inputencoding", 0)
1303         if i != -1:
1304             document.header[i] = "\\inputencoding auto"
1305     # check if preamble exists, if not k is set to -1
1306     i = 0
1307     k = -1
1308     while i < len(document.preamble):
1309         if k == -1:
1310             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1311         if k == -1:
1312             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1313         i = i + 1
1314     # add the entry \usepackage{armtex} to the document preamble
1315     if document.language == "armenian":
1316         # set the armtex entry as the first preamble line
1317         if k != -1:
1318             document.preamble[0:0] = ["\\usepackage{armtex}"]
1319         # create the preamble when it doesn't exist
1320         else:
1321             document.preamble.append('\\usepackage{armtex}')
1322     # Set document language from armenian to english
1323     if document.language == "armenian":
1324         document.language = "english"
1325         i = find_token(document.header, "\\language", 0)
1326         if i != -1:
1327             document.header[i] = "\\language english"
1328
1329
1330 def revert_CJK(document):
1331     " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1332     encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1333                  "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1334     i = find_token(document.header, "\\inputencoding", 0)
1335     if i == -1:
1336         document.header.append("\\inputencoding auto")
1337     else:
1338         inputenc = get_value(document.header, "\\inputencoding", i)
1339         if inputenc in encodings:
1340             document.header[i] = "\\inputencoding default"
1341     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1342
1343     if document.language == "chinese-simplified" or \
1344        document.language == "chinese-traditional" or \
1345        document.language == "japanese" or document.language == "korean":
1346         document.language = "english"
1347         i = find_token(document.header, "\\language", 0)
1348         if i != -1:
1349             document.header[i] = "\\language english"
1350
1351
1352 def revert_preamble_listings_params(document):
1353     " Revert preamble option \listings_params "
1354     i = find_token(document.header, "\\listings_params", 0)
1355     if i != -1:
1356         document.preamble.append('\\usepackage{listings}')
1357         document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1358         document.header.pop(i);
1359
1360
1361 def revert_listings_inset(document):
1362     r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1363 FROM
1364
1365 \begin_inset
1366 lstparams "language=Delphi"
1367 inline true
1368 status open
1369
1370 \begin_layout Standard
1371 var i = 10;
1372 \end_layout
1373
1374 \end_inset
1375
1376 TO
1377
1378 \begin_inset ERT
1379 status open
1380 \begin_layout Standard
1381
1382
1383 \backslash
1384 lstinline[language=Delphi]{var i = 10;}
1385 \end_layout
1386
1387 \end_inset
1388 '''
1389     i = 0
1390     while True:
1391         i = find_token(document.body, '\\begin_inset listings', i)
1392         if i == -1:
1393             break
1394         else:
1395             if not '\\usepackage{listings}' in document.preamble:
1396                 document.preamble.append('\\usepackage{listings}')
1397         j = find_end_of_inset(document.body, i + 1)
1398         if j == -1:
1399             # this should not happen
1400             break
1401         inline = 'false'
1402         params = ''
1403         status = 'open'
1404         inlinecode = ''
1405         # first three lines
1406         for line in range(i + 1, i + 4):
1407             if document.body[line].startswith('inline'):
1408                 inline = document.body[line].split()[1]
1409             if document.body[line].startswith('lstparams'):
1410                 params = document.body[line].split()[1].strip('"')
1411             if document.body[line].startswith('status'):
1412                 status = document.body[line].split()[1].strip()
1413                 k = line + 1
1414         # looking for the oneline code for lstinline
1415         for line in range(i + 2, j + 1):
1416             if document.body[line].startswith(r'\end_layout'):
1417                 inlinecode = document.body[line - 1]
1418                 break
1419         if len(params) > 0:
1420             params = '[%s]' % params
1421         if inline == 'true':
1422             document.body[i:(j+1)] = [r'\begin_inset ERT',
1423                                       'status %s' % status,
1424                                       r'\begin_layout Standard',
1425                                       '',
1426                                       '',
1427                                       r'\backslash',
1428                                       'lstinline%s{%s}' % (params, inlinecode),
1429                                       r'\end_layout',
1430                                       '',
1431                                       r'\end_inset']
1432         else:
1433             document.body[i: j+1] =  [r'\begin_inset ERT',
1434                                       'status %s' % status,
1435                                       '',
1436                                       r'\begin_layout Standard',
1437                                       '',
1438                                       '',
1439                                       r'\backslash',
1440                                       r'begin{lstlisting}%s' % params,
1441                                       r'\end_layout'
1442                                     ] + document.body[k : j - 1] + \
1443                                      ['',
1444                                       r'\begin_layout Standard',
1445                                       '',
1446                                       r'\backslash',
1447                                       'end{lstlisting}',
1448                                       r'\end_layout',
1449                                       '',
1450                                       r'\end_inset']
1451
1452
1453 def revert_include_listings(document):
1454     r''' Revert lstinputlisting Include option , translate
1455 \begin_inset Include \lstinputlisting{file}[opt]
1456 preview false
1457
1458 \end_inset
1459
1460 TO
1461
1462 \begin_inset ERT
1463 status open
1464
1465 \begin_layout Standard
1466
1467
1468 \backslash
1469 lstinputlisting{file}[opt]
1470 \end_layout
1471
1472 \end_inset
1473     '''
1474
1475     i = 0
1476     while True:
1477         i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1478         if i == -1:
1479             break
1480         else:
1481             if not '\\usepackage{listings}' in document.preamble:
1482                 document.preamble.append('\\usepackage{listings}')
1483         j = find_end_of_inset(document.body, i + 1)
1484         if j == -1:
1485             # this should not happen
1486             break
1487         # find command line
1488         cmd = document.body[i].split()[2]
1489         document.body[i : j + 1] = [r'\begin_inset ERT',
1490                                     'status open',
1491                                     '',
1492                                     r'\begin_layout Standard',
1493                                     '',
1494                                     '',
1495                                     r'\backslash',
1496                                     '%s' % cmd[1:],
1497                                     r'\end_layout',
1498                                     '',
1499                                     r'\end_inset']
1500
1501
1502 ##
1503 # Conversion hub
1504 #
1505
1506 supported_versions = ["1.5.0","1.5"]
1507 convert = [[246, []],
1508            [247, [convert_font_settings]],
1509            [248, []],
1510            [249, [convert_utf8]],
1511            [250, []],
1512            [251, []],
1513            [252, [convert_commandparams, convert_bibitem]],
1514            [253, []],
1515            [254, [convert_esint]],
1516            [255, []],
1517            [256, []],
1518            [257, [convert_caption]],
1519            [258, [convert_lyxline]],
1520            [259, [convert_accent, normalize_font_whitespace]],
1521            [260, []],
1522            [261, [convert_changes]],
1523            [262, []],
1524            [263, [normalize_language_name]],
1525            [264, [convert_cv_textclass]],
1526            [265, [convert_tableborder]],
1527            [266, []],
1528            [267, []],
1529            [268, []],
1530            [269, []]]
1531
1532 revert =  [
1533            [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1534            [267, [revert_CJK]],
1535            [266, [revert_utf8plain]],
1536            [265, [revert_armenian]],
1537            [264, [revert_tableborder]],
1538            [263, [revert_cv_textclass]],
1539            [262, [revert_language_name]],
1540            [261, [revert_ascii]],
1541            [260, []],
1542            [259, [revert_utf8x]],
1543            [258, []],
1544            [257, []],
1545            [256, [revert_caption]],
1546            [255, [revert_encodings]],
1547            [254, [revert_clearpage, revert_cleardoublepage]],
1548            [253, [revert_esint]],
1549            [252, [revert_nomenclature, revert_printnomenclature]],
1550            [251, [revert_commandparams]],
1551            [250, [revert_cs_label]],
1552            [249, []],
1553            [248, [revert_accent, revert_utf8]],
1554            [247, [revert_booktabs]],
1555            [246, [revert_font_settings]],
1556            [245, [revert_framed]]]
1557
1558
1559 if __name__ == "__main__":
1560     pass
1561
1562