lib/scripts/layout2layout.py

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # file layout2layout.py
   5 # This file is part of LyX, the document processor.
   6 # Licence details can be found in the file COPYING.
   7
   8 # author Georg Baum
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script will update a .layout file to current format
  13
  14
  15 import os, re, string, sys
  16
  17 # Incremented to format 4, 6 April 2007, lasgouttes
  18 # Introduction of generic "Provides" declaration
  19
  20 # Incremented to format 5, 22 August 2007 by vermeer
  21 # InsetLayout material
  22
  23 # Incremented to format 6, 7 January 2008 by spitz
  24 # Requires tag added to layout files
  25
  26 # Incremented to format 7, 24 March 2008 by rgh
  27 # AddToPreamble tag added to layout files
  28
  29 # Incremented to format 8, 25 July 2008 by rgh
  30 # UseModule tag added to layout files
  31 # CopyStyle added to InsetLayout
  32
  33 # Incremented to format 9, 5 October 2008 by rgh
  34 # ForcePlain and CustomPars tags added to InsetLayout
  35
  36 # Incremented to format 10, 6 October 2008 by rgh
  37 # Change format of counters
  38
  39 # Incremented to format 11, 14 October 2008 by rgh
  40 # Add ProvidesModule, ExcludesModule tags
  41
  42 # Incremented to format 12, 10 January 2009 by gb
  43 # Add I18NPreamble tag
  44
  45 # Incremented to format 13, 5 February 2009 by rgh
  46 # Add InToc tag for InsetLayout
  47
  48 # Incremented to format 14, 14 February 2009 by gb
  49 # Rename I18NPreamble to BabelPreamble and add LangPreamble
  50
  51 # Incremented to format 15, 28 May 2009 by lasgouttes
  52 # Add new tag OutputFormat; modules can be conditioned on feature
  53 # "from->to".
  54
  55 # Incremented to format 16, 5 June 2009 by rgh
  56 # Add new tags for Text Class:
  57 #   HTMLPreamble, HTMLAddToPreamble
  58 # For Layout:
  59 #   HTMLTag, HTMLAttr, HTMLLabel, HTMLLabelAttr, HTMLItem, HTMLItemAttr
  60 #   HTMLStyle, and HTMLPreamble
  61 # For InsetLayout:
  62 #   HTMLTag, HTMLAttr, HTMLStyle, and HTMLPreamble
  63 # For Floats:
  64 #   HTMLType, HTMLClass, HTMLStyle
  65 # These are still to be documented, once everything stabilizes.
  66
  67 # Incremented to format 17, 12 August 2009 by rgh
  68 # Add IfStyle and IfCounter tags for layout.
  69
  70 # Incremented to format 18, 27 October 2009 by rgh
  71 # Added some new tags for HTML output. Documentation still to follow.
  72
  73 # Incremented to format 19, 17 November 2009 by rgh
  74 # Added InPreamble tag.
  75
  76 # Incremented to format 20, 17 December 2009 by rgh
  77 # Added ContentAsLabel tag.
  78
  79 # Do not forget to document format change in Customization
  80 # Manual (section "Declaring a new text class").
  81
  82 currentFormat = 20
  83
  84
  85 def usage(prog_name):
  86     return ("Usage: %s inputfile outputfile\n" % prog_name +
  87             "or     %s <inputfile >outputfile" % prog_name)
  88
  89
  90 def error(message):
  91     sys.stderr.write(message + '\n')
  92     sys.exit(1)
  93
  94
  95 def trim_bom(line):
  96     " Remove byte order mark."
  97     if line[0:3] == "\357\273\277":
  98         return line[3:]
  99     else:
 100         return line
 101
 102
 103 def read(source):
 104     " Read input file and strip lineendings."
 105     lines = source.read().splitlines()
 106     lines[0] = trim_bom(lines[0])
 107     return lines
 108
 109
 110 def write(output, lines):
 111     " Write output file with native lineendings."
 112     output.write(os.linesep.join(lines) + os.linesep)
 113
 114
 115 # Concatenates old and new in an intelligent way:
 116 # If old is wrapped in ", they are stripped. The result is wrapped in ".
 117 def concatenate_label(old, new):
 118     # Don't use strip as long as we support python 1.5.2
 119     if old[0] == '"':
 120         return old[0:-1] + new + '"'
 121     else:
 122         return '"' + old + new + '"'
 123
 124 # appends a string to a list unless it's already there
 125 def addstring(s, l):
 126     if l.count(s) > 0:
 127         return
 128     l.append(s)
 129
 130
 131 def convert(lines):
 132     " Convert to new format."
 133     re_Comment = re.compile(r'^(\s*)#')
 134     re_Counter = re.compile(r'\s*Counter\s*', re.IGNORECASE)
 135     re_Name = re.compile(r'\s*Name\s+(\S+)\s*', re.IGNORECASE)
 136     re_UseMod = re.compile(r'^\s*UseModule\s+(.*)', re.IGNORECASE)
 137     re_Empty = re.compile(r'^(\s*)$')
 138     re_Format = re.compile(r'^(\s*)(Format)(\s+)(\S+)', re.IGNORECASE)
 139     re_Preamble = re.compile(r'^(\s*)Preamble', re.IGNORECASE)
 140     re_EndPreamble = re.compile(r'^(\s*)EndPreamble', re.IGNORECASE)
 141     re_LangPreamble = re.compile(r'^(\s*)LangPreamble', re.IGNORECASE)
 142     re_EndLangPreamble = re.compile(r'^(\s*)EndLangPreamble', re.IGNORECASE)
 143     re_BabelPreamble = re.compile(r'^(\s*)BabelPreamble', re.IGNORECASE)
 144     re_EndBabelPreamble = re.compile(r'^(\s*)EndBabelPreamble', re.IGNORECASE)
 145     re_MaxCounter = re.compile(r'^(\s*)(MaxCounter)(\s+)(\S+)', re.IGNORECASE)
 146     re_LabelType = re.compile(r'^(\s*)(LabelType)(\s+)(\S+)', re.IGNORECASE)
 147     re_LabelString = re.compile(r'^(\s*)(LabelString)(\s+)(("[^"]+")|(\S+))', re.IGNORECASE)
 148     re_LabelStringAppendix = re.compile(r'^(\s*)(LabelStringAppendix)(\s+)(("[^"]+")|(\S+))', re.IGNORECASE)
 149     re_LatexType = re.compile(r'^(\s*)(LatexType)(\s+)(\S+)', re.IGNORECASE)
 150     re_Style = re.compile(r'^(\s*)(Style)(\s+)(\S+)', re.IGNORECASE)
 151     re_CopyStyle = re.compile(r'^(\s*)(CopyStyle)(\s+)(\S+)', re.IGNORECASE)
 152     re_NoStyle = re.compile(r'^(\s*)(NoStyle)(\s+)(\S+)', re.IGNORECASE)
 153     re_End = re.compile(r'^(\s*)(End)(\s*)$', re.IGNORECASE)
 154     re_Provides = re.compile(r'^(\s*)Provides(\S+)(\s+)(\S+)', re.IGNORECASE)
 155     re_CharStyle = re.compile(r'^(\s*)CharStyle(\s+)(\S+)$', re.IGNORECASE)
 156     re_AMSMaths = re.compile(r'^\s*Input ams(?:math|def)s.inc\s*')
 157     re_AMSMathsPlain = re.compile(r'^\s*Input amsmaths-plain.inc\s*')
 158     re_AMSMathsSeq = re.compile(r'^\s*Input amsmaths-seq.inc\s*')
 159     re_TocLevel = re.compile(r'^(\s*)(TocLevel)(\s+)(\S+)', re.IGNORECASE)
 160     re_I18nPreamble = re.compile(r'^(\s*)I18nPreamble', re.IGNORECASE)
 161     re_EndI18nPreamble = re.compile(r'^(\s*)EndI18nPreamble', re.IGNORECASE)
 162
 163     # counters for sectioning styles (hardcoded in 1.3)
 164     counters = {"part"          : "\\Roman{part}",
 165                 "chapter"       : "\\arabic{chapter}",
 166                 "section"       : "\\arabic{section}",
 167                 "subsection"    : "\\arabic{section}.\\arabic{subsection}",
 168                 "subsubsection" : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}",
 169                 "paragraph"     : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}",
 170                 "subparagraph"  : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}.\\arabic{subparagraph}"}
 171
 172     # counters for sectioning styles in appendix (hardcoded in 1.3)
 173     appendixcounters = {"chapter"       : "\\Alph{chapter}",
 174                         "section"       : "\\Alph{section}",
 175                         "subsection"    : "\\arabic{section}.\\arabic{subsection}",
 176                         "subsubsection" : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}",
 177                         "paragraph"     : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}",
 178                         "subparagraph"  : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}.\\arabic{subparagraph}"}
 179
 180     # Value of TocLevel for sectioning styles
 181     toclevels = {"part"          : 0,
 182                  "chapter"       : 0,
 183                  "section"       : 1,
 184                  "subsection"    : 2,
 185                  "subsubsection" : 3,
 186                  "paragraph"     : 4,
 187                  "subparagraph"  : 5}
 188
 189     i = 0
 190     only_comment = 1
 191     counter = ""
 192     toclevel = ""
 193     label = ""
 194     labelstring = ""
 195     labelstringappendix = ""
 196     space1 = ""
 197     labelstring_line = -1
 198     labelstringappendix_line = -1
 199     labeltype_line = -1
 200     latextype = ""
 201     latextype_line = -1
 202     style = ""
 203     maxcounter = 0
 204     format = 1
 205     formatline = 0
 206     usemodules = []
 207
 208     while i < len(lines):
 209         # Skip comments and empty lines
 210         if re_Comment.match(lines[i]) or re_Empty.match(lines[i]):
 211             i += 1
 212             continue
 213
 214         # insert file format if not already there
 215         if (only_comment):
 216             match = re_Format.match(lines[i])
 217             if match:
 218                 formatline = i
 219                 format = int(match.group(4))
 220                 if format > 1 and format < currentFormat:
 221                     lines[i] = "Format %d" % (format + 1)
 222                     only_comment = 0
 223                 elif format == currentFormat:
 224                     # nothing to do
 225                     return format
 226                 else:
 227                     error('Cannot convert file format %s' % format)
 228             else:
 229                 lines.insert(i, "Format 2")
 230                 only_comment = 0
 231                 continue
 232
 233         # Don't get confused by LaTeX code
 234         if re_Preamble.match(lines[i]):
 235             i += 1
 236             while i < len(lines) and not re_EndPreamble.match(lines[i]):
 237                 i += 1
 238             continue
 239         if re_LangPreamble.match(lines[i]):
 240             i += 1
 241             while i < len(lines) and not re_EndLangPreamble.match(lines[i]):
 242                 i += 1
 243             continue
 244         if re_BabelPreamble.match(lines[i]):
 245             i += 1
 246             while i < len(lines) and not re_EndBabelPreamble.match(lines[i]):
 247                 i += 1
 248             continue
 249
 250         # This just involved new features, not any changes to old ones
 251         if format >= 14 and format <= 19:
 252           i += 1
 253           continue
 254
 255         # Rename I18NPreamble to BabelPreamble
 256         if format == 13:
 257             match = re_I18nPreamble.match(lines[i])
 258             if match:
 259                 lines[i] = match.group(1) + "BabelPreamble"
 260                 i += 1
 261                 match = re_EndI18nPreamble.match(lines[i])
 262                 while i < len(lines) and not match:
 263                     i += 1
 264                     match = re_EndI18nPreamble.match(lines[i])
 265                 lines[i] = match.group(1) + "EndBabelPreamble"
 266                 i += 1
 267                 continue
 268
 269         # These just involved new features, not any changes to old ones
 270         if format == 11 or format == 12:
 271           i += 1
 272           continue
 273
 274         if format == 10:
 275             match = re_UseMod.match(lines[i])
 276             if match:
 277                 module = match.group(1)
 278                 lines[i] = "DefaultModule " + module
 279             i += 1
 280             continue
 281
 282         if format == 9:
 283             match = re_Counter.match(lines[i])
 284             if match:
 285                 counterline = i
 286                 i += 1
 287                 while i < len(lines):
 288                     namem = re_Name.match(lines[i])
 289                     if namem:
 290                         name = namem.group(1)
 291                         lines.pop(i)
 292                         lines[counterline] = "Counter %s" % name
 293                         # we don't need to increment i
 294                         continue
 295                     endem = re_End.match(lines[i])
 296                     if endem:
 297                         i += 1
 298                         break
 299                     i += 1
 300             i += 1
 301             continue
 302
 303         if format == 8:
 304             # We want to scan for ams-type includes and, if we find them,
 305             # add corresponding UseModule tags to the layout.
 306             match = re_AMSMaths.match(lines[i])
 307             if match:
 308                 addstring("theorems-ams", usemodules)
 309                 addstring("theorems-ams-extended", usemodules)
 310                 addstring("theorems-sec", usemodules)
 311                 lines.pop(i)
 312                 continue
 313             match = re_AMSMathsPlain.match(lines[i])
 314             if match:
 315                 addstring("theorems-starred", usemodules)
 316                 lines.pop(i)
 317                 continue
 318             match = re_AMSMathsSeq.match(lines[i])
 319             if match:
 320                 addstring("theorems-ams", usemodules)
 321                 addstring("theorems-ams-extended", usemodules)
 322                 lines.pop(i)
 323                 continue
 324             i += 1
 325             continue
 326
 327         # These just involved new features, not any changes to old ones
 328         if format >= 5 and format <= 7:
 329           i += 1
 330           continue
 331
 332         if format == 4:
 333             # Handle conversion to long CharStyle names
 334             match = re_CharStyle.match(lines[i])
 335             if match:
 336                 lines[i] = "InsetLayout CharStyle:%s" % (match.group(3))
 337                 i += 1
 338                 lines.insert(i, "\tLyXType charstyle")
 339                 i += 1
 340                 lines.insert(i, "")
 341                 lines[i] = "\tLabelString %s" % (match.group(3))
 342             i += 1
 343             continue
 344
 345         if format == 3:
 346             # convert 'providesamsmath x',  'providesmakeidx x',  'providesnatbib x',  'providesurl x' to
 347             #         'provides amsmath x', 'provides makeidx x', 'provides natbib x', 'provides url x'
 348             # x is either 0 or 1
 349             match = re_Provides.match(lines[i])
 350             if match:
 351                 lines[i] = "%sProvides %s%s%s" % (match.group(1), match.group(2).lower(),
 352                                                   match.group(3), match.group(4))
 353             i += 1
 354             continue
 355
 356         if format == 2:
 357             caption = []
 358
 359             # delete caption styles
 360             match = re_Style.match(lines[i])
 361             if match:
 362                 style = string.lower(match.group(4))
 363                 if style == "caption":
 364                     del lines[i]
 365                     while i < len(lines) and not re_End.match(lines[i]):
 366                         caption.append(lines[i])
 367                         del lines[i]
 368                     if i == len(lines):
 369                         error('Incomplete caption style.')
 370                     else:
 371                         del lines[i]
 372                         continue
 373
 374             # delete undefinition of caption styles
 375             match = re_NoStyle.match(lines[i])
 376             if match:
 377                 style = string.lower(match.group(4))
 378                 if style == "caption":
 379                     del lines[i]
 380                     continue
 381
 382             # replace the CopyStyle statement with the definition of the real
 383             # style. This may result in duplicate statements, but that is OK
 384             # since the second one will overwrite the first one.
 385             match = re_CopyStyle.match(lines[i])
 386             if match:
 387                 style = string.lower(match.group(4))
 388                 if style == "caption":
 389                     if len(caption) > 0:
 390                         lines[i:i+1] = caption
 391                     else:
 392                         # FIXME: This style comes from an include file, we
 393                         # should replace the real style and not this default.
 394                         lines[i:i+1] = ['       Margin                First_Dynamic',
 395                                         '       LatexType             Command',
 396                                         '       LatexName             caption',
 397                                         '       NeedProtect           1',
 398                                         '       LabelSep              xx',
 399                                         '       ParSkip               0.4',
 400                                         '       TopSep                0.5',
 401                                         '       Align                 Center',
 402                                         '       AlignPossible         Center',
 403                                         '       LabelType             Sensitive',
 404                                         '       LabelString           "Senseless!"',
 405                                         '       OptionalArgs          1',
 406                                         '       LabelFont',
 407                                         '         Series              Bold',
 408                                         '       EndFont']
 409
 410             i += 1
 411             continue
 412
 413         # Delete MaxCounter and remember the value of it
 414         match = re_MaxCounter.match(lines[i])
 415         if match:
 416             level = match.group(4)
 417             if string.lower(level) == "counter_chapter":
 418                 maxcounter = 0
 419             elif string.lower(level) == "counter_section":
 420                 maxcounter = 1
 421             elif string.lower(level) == "counter_subsection":
 422                 maxcounter = 2
 423             elif string.lower(level) == "counter_subsubsection":
 424                 maxcounter = 3
 425             elif string.lower(level) == "counter_paragraph":
 426                 maxcounter = 4
 427             elif string.lower(level) == "counter_subparagraph":
 428                 maxcounter = 5
 429             elif string.lower(level) == "counter_enumi":
 430                 maxcounter = 6
 431             elif string.lower(level) == "counter_enumii":
 432                 maxcounter = 7
 433             elif string.lower(level) == "counter_enumiii":
 434                 maxcounter = 8
 435             del lines[i]
 436             continue
 437
 438         # Replace line
 439         #
 440         # LabelType Counter_EnumI
 441         #
 442         # with two lines
 443         #
 444         # LabelType Counter
 445         # LabelCounter EnumI
 446         #
 447         match = re_LabelType.match(lines[i])
 448         if match:
 449             label = match.group(4)
 450             # Remember indenting space for later reuse in added lines
 451             space1 = match.group(1)
 452             # Remember the line for adding the LabelCounter later.
 453             # We can't do it here because it could shift latextype_line etc.
 454             labeltype_line = i
 455             if string.lower(label[:8]) == "counter_":
 456                 counter = string.lower(label[8:])
 457                 lines[i] = re_LabelType.sub(r'\1\2\3Counter', lines[i])
 458
 459         # Remember the LabelString line
 460         match = re_LabelString.match(lines[i])
 461         if match:
 462             labelstring = match.group(4)
 463             labelstring_line = i
 464
 465         # Remember the LabelStringAppendix line
 466         match = re_LabelStringAppendix.match(lines[i])
 467         if match:
 468             labelstringappendix = match.group(4)
 469             labelstringappendix_line = i
 470
 471         # Remember the LatexType line
 472         match = re_LatexType.match(lines[i])
 473         if match:
 474             latextype = string.lower(match.group(4))
 475             latextype_line = i
 476
 477         # Remember the TocLevel line
 478         match = re_TocLevel.match(lines[i])
 479         if match:
 480             toclevel = string.lower(match.group(4))
 481
 482         # Reset variables at the beginning of a style definition
 483         match = re_Style.match(lines[i])
 484         if match:
 485             style = string.lower(match.group(4))
 486             counter = ""
 487             toclevel = ""
 488             label = ""
 489             space1 = ""
 490             labelstring = ""
 491             labelstringappendix = ""
 492             labelstring_line = -1
 493             labelstringappendix_line = -1
 494             labeltype_line = -1
 495             latextype = ""
 496             latextype_line = -1
 497
 498         if re_End.match(lines[i]):
 499
 500             # Add a line "LatexType Bib_Environment" if LabelType is Bibliography
 501             # (or change the existing LatexType)
 502             if string.lower(label) == "bibliography":
 503                 if (latextype_line < 0):
 504                     lines.insert(i, "%sLatexType Bib_Environment" % space1)
 505                     i += 1
 506                 else:
 507                     lines[latextype_line] = re_LatexType.sub(r'\1\2\3Bib_Environment', lines[latextype_line])
 508
 509             # Change "LabelType Static" to "LabelType Itemize" for itemize environments
 510             if latextype == "item_environment" and string.lower(label) == "static":
 511                 lines[labeltype_line] = re_LabelType.sub(r'\1\2\3Itemize', lines[labeltype_line])
 512
 513             # Change "LabelType Counter_EnumI" to "LabelType Enumerate" for enumerate environments
 514             if latextype == "item_environment" and string.lower(label) == "counter_enumi":
 515                 lines[labeltype_line] = re_LabelType.sub(r'\1\2\3Enumerate', lines[labeltype_line])
 516                 # Don't add the LabelCounter line later
 517                 counter = ""
 518
 519             # Replace
 520             #
 521             # LabelString "Chapter"
 522             #
 523             # with
 524             #
 525             # LabelString "Chapter \arabic{chapter}"
 526             #
 527             # if this style has a counter. Ditto for LabelStringAppendix.
 528             # This emulates the hardcoded article style numbering of 1.3
 529             #
 530             if counter != "":
 531                 if counters.has_key(style):
 532                     if labelstring_line < 0:
 533                         lines.insert(i, '%sLabelString "%s"' % (space1, counters[style]))
 534                         i += 1
 535                     else:
 536                         new_labelstring = concatenate_label(labelstring, counters[style])
 537                         lines[labelstring_line] = re_LabelString.sub(
 538                                 r'\1\2\3%s' % new_labelstring.replace("\\", "\\\\"),
 539                                 lines[labelstring_line])
 540                 if appendixcounters.has_key(style):
 541                     if labelstringappendix_line < 0:
 542                         lines.insert(i, '%sLabelStringAppendix "%s"' % (space1, appendixcounters[style]))
 543                         i += 1
 544                     else:
 545                         new_labelstring = concatenate_label(labelstring, appendixcounters[style])
 546                         lines[labelstringappendix_line] = re_LabelStringAppendix.sub(
 547                                 r'\1\2\3%s' % new_labelstring.replace("\\", "\\\\"),
 548                                 lines[labelstringappendix_line])
 549
 550                 # Now we can safely add the LabelCounter line
 551                 lines.insert(labeltype_line + 1, "%sLabelCounter %s" % (space1, counter))
 552                 i += 1
 553
 554             # Add the TocLevel setting for sectioning styles
 555             if toclevel == "" and toclevels.has_key(style) and maxcounter <= toclevels[style]:
 556                 lines.insert(i, '%s\tTocLevel %d' % (space1, toclevels[style]))
 557                 i += 1
 558
 559         i += 1
 560
 561     if usemodules:
 562         i = formatline + 1
 563         for mod in usemodules:
 564             lines.insert(i, "UseModule " + mod)
 565             i += 1
 566
 567     return format + 1
 568
 569
 570 def main(argv):
 571
 572     # Open files
 573     if len(argv) == 1:
 574         source = sys.stdin
 575         output = sys.stdout
 576     elif len(argv) == 3:
 577         source = open(argv[1], 'rb')
 578         output = open(argv[2], 'wb')
 579     else:
 580         error(usage(argv[0]))
 581
 582     # Do the real work
 583     lines = read(source)
 584     format = 1
 585     while (format < currentFormat):
 586         format = convert(lines)
 587     write(output, lines)
 588
 589     # Close files
 590     if len(argv) == 3:
 591         source.close()
 592         output.close()
 593
 594     return 0
 595
 596
 597 if __name__ == "__main__":
 598     main(sys.argv)