lib/scripts/layout2layout.py

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # file layout2layout.py
   5 # This file is part of LyX, the document processor.
   6 # Licence details can be found in the file COPYING.
   7
   8 # author Georg Baum
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script will update a .layout file to current format
  13
  14
  15 import os, re, string, sys
  16
  17 # Incremented to format 4, 6 April 2007, lasgouttes
  18 # Introduction of generic "Provides" declaration
  19
  20 # Incremented to format 5, 22 August 2007 by vermeer
  21 # InsetLayout material
  22
  23 # Incremented to format 6, 7 January 2008 by spitz
  24 # Requires tag added to layout files
  25
  26 # Incremented to format 7, 24 March 2008 by rgh
  27 # AddToPreamble tag added to layout files
  28
  29 # Incremented to format 8, 25 July 2008 by rgh
  30 # UseModule tag added to layout files
  31 # CopyStyle added to InsetLayout
  32
  33 # Incremented to format 9, 5 October 2008 by rgh
  34 # ForcePlain and CustomPars tags added to InsetLayout
  35
  36 # Incremented to format 10, 6 October 2008 by rgh
  37 # Change format of counters
  38
  39 # Incremented to format 11, 14 October 2008 by rgh
  40 # Add ProvidesModule, ExcludesModule tags
  41
  42 # Incremented to format 12, 10 January 2009 by gb
  43 # Add I18NPreamble tag
  44
  45 # Incremented to format 13, 5 February 2009 by rgh
  46 # Add InToc tag for InsetLayout
  47
  48 # Incremented to format 14, 14 February 2009 by gb
  49 # Rename I18NPreamble to BabelPreamble and add LangPreamble
  50
  51 # Incremented to format 15, 28 May 2009 by lasgouttes
  52 # Add new tag OutputFormat; modules can be conditioned on feature
  53 # "from->to".
  54
  55 # Incremented to format 16, 5 June 2009 by rgh
  56 # Add new tags for Text Class:
  57 #   HTMLPreamble, HTMLAddToPreamble
  58 # For Layout:
  59 #   HTMLTag, HTMLAttr, HTMLLabel, HTMLLabelAttr, HTMLItem, HTMLItemAttr
  60 #   HTMLStyle, and HTMLPreamble
  61 # For InsetLayout:
  62 #   HTMLTag, HTMLAttr, HTMLStyle, and HTMLPreamble
  63 # For Floats:
  64 #   HTMLType, HTMLClass, HTMLStyle
  65 # These are still to be documented, once everything stabilizes.
  66
  67 # Incremented to format 17, 12 August 2009 by rgh
  68 # Add IfStyle and IfCounter tags for layout.
  69
  70 # Incremented to format 18, 27 October 2009 by rgh
  71 # Added some new tags for HTML output. Documentation still to follow.
  72
  73 # Do not forget to document format change in Customization
  74 # Manual (section "Declaring a new text class").
  75
  76 currentFormat = 18
  77
  78
  79 def usage(prog_name):
  80     return ("Usage: %s inputfile outputfile\n" % prog_name +
  81             "or     %s <inputfile >outputfile" % prog_name)
  82
  83
  84 def error(message):
  85     sys.stderr.write(message + '\n')
  86     sys.exit(1)
  87
  88
  89 def trim_eol(line):
  90     " Remove end of line char(s)."
  91     if line[-2:-1] == '\r':
  92         return line[:-2]
  93     elif line[-1:] == '\r' or line[-1:] == '\n':
  94         return line[:-1]
  95     else:
  96         # file with no EOL in last line
  97         return line
  98
  99
 100 def trim_bom(line):
 101     " Remove byte order mark."
 102     if line[0:3] == "\357\273\277":
 103         return line[3:]
 104     else:
 105         return line
 106
 107
 108 def read(input):
 109     " Read input file and strip lineendings."
 110     lines = list()
 111     first_line = 1
 112     while 1:
 113         line = input.readline()
 114         if not line:
 115             break
 116         if (first_line):
 117             line = trim_bom(line)
 118             first_line = 0
 119         lines.append(trim_eol(line))
 120     return lines
 121
 122
 123 def write(output, lines):
 124     " Write output file with native lineendings."
 125     for line in lines:
 126         output.write(line + os.linesep)
 127
 128
 129 # Concatenates old and new in an intelligent way:
 130 # If old is wrapped in ", they are stripped. The result is wrapped in ".
 131 def concatenate_label(old, new):
 132     # Don't use strip as long as we support python 1.5.2
 133     if old[0] == '"':
 134         return old[0:-1] + new + '"'
 135     else:
 136         return '"' + old + new + '"'
 137
 138 # appends a string to a list unless it's already there
 139 def addstring(s, l):
 140     if l.count(s) > 0:
 141         return
 142     l.append(s)
 143
 144
 145 def convert(lines):
 146     " Convert to new format."
 147     re_Comment = re.compile(r'^(\s*)#')
 148     re_Counter = re.compile(r'\s*Counter\s*', re.IGNORECASE)
 149     re_Name = re.compile(r'\s*Name\s+(\S+)\s*', re.IGNORECASE)
 150     re_UseMod = re.compile(r'^\s*UseModule\s+(.*)', re.IGNORECASE)
 151     re_Empty = re.compile(r'^(\s*)$')
 152     re_Format = re.compile(r'^(\s*)(Format)(\s+)(\S+)', re.IGNORECASE)
 153     re_Preamble = re.compile(r'^(\s*)Preamble', re.IGNORECASE)
 154     re_EndPreamble = re.compile(r'^(\s*)EndPreamble', re.IGNORECASE)
 155     re_LangPreamble = re.compile(r'^(\s*)LangPreamble', re.IGNORECASE)
 156     re_EndLangPreamble = re.compile(r'^(\s*)EndLangPreamble', re.IGNORECASE)
 157     re_BabelPreamble = re.compile(r'^(\s*)BabelPreamble', re.IGNORECASE)
 158     re_EndBabelPreamble = re.compile(r'^(\s*)EndBabelPreamble', re.IGNORECASE)
 159     re_MaxCounter = re.compile(r'^(\s*)(MaxCounter)(\s+)(\S+)', re.IGNORECASE)
 160     re_LabelType = re.compile(r'^(\s*)(LabelType)(\s+)(\S+)', re.IGNORECASE)
 161     re_LabelString = re.compile(r'^(\s*)(LabelString)(\s+)(("[^"]+")|(\S+))', re.IGNORECASE)
 162     re_LabelStringAppendix = re.compile(r'^(\s*)(LabelStringAppendix)(\s+)(("[^"]+")|(\S+))', re.IGNORECASE)
 163     re_LatexType = re.compile(r'^(\s*)(LatexType)(\s+)(\S+)', re.IGNORECASE)
 164     re_Style = re.compile(r'^(\s*)(Style)(\s+)(\S+)', re.IGNORECASE)
 165     re_CopyStyle = re.compile(r'^(\s*)(CopyStyle)(\s+)(\S+)', re.IGNORECASE)
 166     re_NoStyle = re.compile(r'^(\s*)(NoStyle)(\s+)(\S+)', re.IGNORECASE)
 167     re_End = re.compile(r'^(\s*)(End)(\s*)$', re.IGNORECASE)
 168     re_Provides = re.compile(r'^(\s*)Provides(\S+)(\s+)(\S+)', re.IGNORECASE)
 169     re_CharStyle = re.compile(r'^(\s*)CharStyle(\s+)(\S+)$', re.IGNORECASE)
 170     re_AMSMaths = re.compile(r'^\s*Input ams(?:math|def)s.inc\s*')
 171     re_AMSMathsPlain = re.compile(r'^\s*Input amsmaths-plain.inc\s*')
 172     re_AMSMathsSeq = re.compile(r'^\s*Input amsmaths-seq.inc\s*')
 173     re_TocLevel = re.compile(r'^(\s*)(TocLevel)(\s+)(\S+)', re.IGNORECASE)
 174     re_I18nPreamble = re.compile(r'^(\s*)I18nPreamble', re.IGNORECASE)
 175     re_EndI18nPreamble = re.compile(r'^(\s*)EndI18nPreamble', re.IGNORECASE)
 176
 177     # counters for sectioning styles (hardcoded in 1.3)
 178     counters = {"part"          : "\\Roman{part}",
 179                 "chapter"       : "\\arabic{chapter}",
 180                 "section"       : "\\arabic{section}",
 181                 "subsection"    : "\\arabic{section}.\\arabic{subsection}",
 182                 "subsubsection" : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}",
 183                 "paragraph"     : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}",
 184                 "subparagraph"  : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}.\\arabic{subparagraph}"}
 185
 186     # counters for sectioning styles in appendix (hardcoded in 1.3)
 187     appendixcounters = {"chapter"       : "\\Alph{chapter}",
 188                         "section"       : "\\Alph{section}",
 189                         "subsection"    : "\\arabic{section}.\\arabic{subsection}",
 190                         "subsubsection" : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}",
 191                         "paragraph"     : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}",
 192                         "subparagraph"  : "\\arabic{section}.\\arabic{subsection}.\\arabic{subsubsection}.\\arabic{paragraph}.\\arabic{subparagraph}"}
 193
 194     # Value of TocLevel for sectioning styles
 195     toclevels = {"part"          : 0,
 196                  "chapter"       : 0,
 197                  "section"       : 1,
 198                  "subsection"    : 2,
 199                  "subsubsection" : 3,
 200                  "paragraph"     : 4,
 201                  "subparagraph"  : 5}
 202
 203     i = 0
 204     only_comment = 1
 205     counter = ""
 206     toclevel = ""
 207     label = ""
 208     labelstring = ""
 209     labelstringappendix = ""
 210     space1 = ""
 211     labelstring_line = -1
 212     labelstringappendix_line = -1
 213     labeltype_line = -1
 214     latextype = ""
 215     latextype_line = -1
 216     style = ""
 217     maxcounter = 0
 218     format = 1
 219     formatline = 0
 220     usemodules = []
 221
 222     while i < len(lines):
 223         # Skip comments and empty lines
 224         if re_Comment.match(lines[i]) or re_Empty.match(lines[i]):
 225             i += 1
 226             continue
 227
 228         # insert file format if not already there
 229         if (only_comment):
 230             match = re_Format.match(lines[i])
 231             if match:
 232                 formatline = i
 233                 format = int(match.group(4))
 234                 if format > 1 and format < currentFormat:
 235                     lines[i] = "Format %d" % (format + 1)
 236                     only_comment = 0
 237                 elif format == currentFormat:
 238                     # nothing to do
 239                     return format
 240                 else:
 241                     error('Cannot convert file format %s' % format)
 242             else:
 243                 lines.insert(i, "Format 2")
 244                 only_comment = 0
 245                 continue
 246
 247         # Don't get confused by LaTeX code
 248         if re_Preamble.match(lines[i]):
 249             i += 1
 250             while i < len(lines) and not re_EndPreamble.match(lines[i]):
 251                 i += 1
 252             continue
 253         if re_LangPreamble.match(lines[i]):
 254             i += 1
 255             while i < len(lines) and not re_EndLangPreamble.match(lines[i]):
 256                 i += 1
 257             continue
 258         if re_BabelPreamble.match(lines[i]):
 259             i += 1
 260             while i < len(lines) and not re_EndBabelPreamble.match(lines[i]):
 261                 i += 1
 262             continue
 263
 264         # This just involved new features, not any changes to old ones
 265         if format == 14 or format == 15 or format == 16 or format == 17:
 266           i += 1
 267           continue
 268
 269         # Rename I18NPreamble to BabelPreamble
 270         if format == 13:
 271             match = re_I18nPreamble.match(lines[i])
 272             if match:
 273                 lines[i] = match.group(1) + "BabelPreamble"
 274                 i += 1
 275                 match = re_EndI18nPreamble.match(lines[i])
 276                 while i < len(lines) and not match:
 277                     i += 1
 278                     match = re_EndI18nPreamble.match(lines[i])
 279                 lines[i] = match.group(1) + "EndBabelPreamble"
 280                 i += 1
 281                 continue
 282
 283         # These just involved new features, not any changes to old ones
 284         if format == 11 or format == 12:
 285           i += 1
 286           continue
 287
 288         if format == 10:
 289             match = re_UseMod.match(lines[i])
 290             if match:
 291                 module = match.group(1)
 292                 lines[i] = "DefaultModule " + module
 293             i += 1
 294             continue
 295
 296         if format == 9:
 297             match = re_Counter.match(lines[i])
 298             if match:
 299                 counterline = i
 300                 i += 1
 301                 while i < len(lines):
 302                     namem = re_Name.match(lines[i])
 303                     if namem:
 304                         name = namem.group(1)
 305                         lines.pop(i)
 306                         lines[counterline] = "Counter %s" % name
 307                         # we don't need to increment i
 308                         continue
 309                     endem = re_End.match(lines[i])
 310                     if endem:
 311                         i += 1
 312                         break
 313                     i += 1
 314             i += 1
 315             continue
 316
 317         if format == 8:
 318             # We want to scan for ams-type includes and, if we find them,
 319             # add corresponding UseModule tags to the layout.
 320             match = re_AMSMaths.match(lines[i])
 321             if match:
 322                 addstring("theorems-ams", usemodules)
 323                 addstring("theorems-ams-extended", usemodules)
 324                 addstring("theorems-sec", usemodules)
 325                 lines.pop(i)
 326                 continue
 327             match = re_AMSMathsPlain.match(lines[i])
 328             if match:
 329                 addstring("theorems-starred", usemodules)
 330                 lines.pop(i)
 331                 continue
 332             match = re_AMSMathsSeq.match(lines[i])
 333             if match:
 334                 addstring("theorems-ams", usemodules)
 335                 addstring("theorems-ams-extended", usemodules)
 336                 lines.pop(i)
 337                 continue
 338             i += 1
 339             continue
 340
 341         # These just involved new features, not any changes to old ones
 342         if format >= 5 and format <= 7:
 343           i += 1
 344           continue
 345
 346         if format == 4:
 347             # Handle conversion to long CharStyle names
 348             match = re_CharStyle.match(lines[i])
 349             if match:
 350                 lines[i] = "InsetLayout CharStyle:%s" % (match.group(3))
 351                 i += 1
 352                 lines.insert(i, "\tLyXType charstyle")
 353                 i += 1
 354                 lines.insert(i, "")
 355                 lines[i] = "\tLabelString %s" % (match.group(3))
 356             i += 1
 357             continue
 358
 359         if format == 3:
 360             # convert 'providesamsmath x',  'providesmakeidx x',  'providesnatbib x',  'providesurl x' to
 361             #         'provides amsmath x', 'provides makeidx x', 'provides natbib x', 'provides url x'
 362             # x is either 0 or 1
 363             match = re_Provides.match(lines[i])
 364             if match:
 365                 lines[i] = "%sProvides %s%s%s" % (match.group(1), match.group(2).lower(),
 366                                                   match.group(3), match.group(4))
 367             i += 1
 368             continue
 369
 370         if format == 2:
 371             caption = []
 372
 373             # delete caption styles
 374             match = re_Style.match(lines[i])
 375             if match:
 376                 style = string.lower(match.group(4))
 377                 if style == "caption":
 378                     del lines[i]
 379                     while i < len(lines) and not re_End.match(lines[i]):
 380                         caption.append(lines[i])
 381                         del lines[i]
 382                     if i == len(lines):
 383                         error('Incomplete caption style.')
 384                     else:
 385                         del lines[i]
 386                         continue
 387
 388             # delete undefinition of caption styles
 389             match = re_NoStyle.match(lines[i])
 390             if match:
 391                 style = string.lower(match.group(4))
 392                 if style == "caption":
 393                     del lines[i]
 394                     continue
 395
 396             # replace the CopyStyle statement with the definition of the real
 397             # style. This may result in duplicate statements, but that is OK
 398             # since the second one will overwrite the first one.
 399             match = re_CopyStyle.match(lines[i])
 400             if match:
 401                 style = string.lower(match.group(4))
 402                 if style == "caption":
 403                     if len(caption) > 0:
 404                         lines[i:i+1] = caption
 405                     else:
 406                         # FIXME: This style comes from an include file, we
 407                         # should replace the real style and not this default.
 408                         lines[i:i+1] = ['       Margin                First_Dynamic',
 409                                         '       LatexType             Command',
 410                                         '       LatexName             caption',
 411                                         '       NeedProtect           1',
 412                                         '       LabelSep              xx',
 413                                         '       ParSkip               0.4',
 414                                         '       TopSep                0.5',
 415                                         '       Align                 Center',
 416                                         '       AlignPossible         Center',
 417                                         '       LabelType             Sensitive',
 418                                         '       LabelString           "Senseless!"',
 419                                         '       OptionalArgs          1',
 420                                         '       LabelFont',
 421                                         '         Series              Bold',
 422                                         '       EndFont']
 423
 424             i += 1
 425             continue
 426
 427         # Delete MaxCounter and remember the value of it
 428         match = re_MaxCounter.match(lines[i])
 429         if match:
 430             level = match.group(4)
 431             if string.lower(level) == "counter_chapter":
 432                 maxcounter = 0
 433             elif string.lower(level) == "counter_section":
 434                 maxcounter = 1
 435             elif string.lower(level) == "counter_subsection":
 436                 maxcounter = 2
 437             elif string.lower(level) == "counter_subsubsection":
 438                 maxcounter = 3
 439             elif string.lower(level) == "counter_paragraph":
 440                 maxcounter = 4
 441             elif string.lower(level) == "counter_subparagraph":
 442                 maxcounter = 5
 443             elif string.lower(level) == "counter_enumi":
 444                 maxcounter = 6
 445             elif string.lower(level) == "counter_enumii":
 446                 maxcounter = 7
 447             elif string.lower(level) == "counter_enumiii":
 448                 maxcounter = 8
 449             del lines[i]
 450             continue
 451
 452         # Replace line
 453         #
 454         # LabelType Counter_EnumI
 455         #
 456         # with two lines
 457         #
 458         # LabelType Counter
 459         # LabelCounter EnumI
 460         #
 461         match = re_LabelType.match(lines[i])
 462         if match:
 463             label = match.group(4)
 464             # Remember indenting space for later reuse in added lines
 465             space1 = match.group(1)
 466             # Remember the line for adding the LabelCounter later.
 467             # We can't do it here because it could shift latextype_line etc.
 468             labeltype_line = i
 469             if string.lower(label[:8]) == "counter_":
 470                 counter = string.lower(label[8:])
 471                 lines[i] = re_LabelType.sub(r'\1\2\3Counter', lines[i])
 472
 473         # Remember the LabelString line
 474         match = re_LabelString.match(lines[i])
 475         if match:
 476             labelstring = match.group(4)
 477             labelstring_line = i
 478
 479         # Remember the LabelStringAppendix line
 480         match = re_LabelStringAppendix.match(lines[i])
 481         if match:
 482             labelstringappendix = match.group(4)
 483             labelstringappendix_line = i
 484
 485         # Remember the LatexType line
 486         match = re_LatexType.match(lines[i])
 487         if match:
 488             latextype = string.lower(match.group(4))
 489             latextype_line = i
 490
 491         # Remember the TocLevel line
 492         match = re_TocLevel.match(lines[i])
 493         if match:
 494             toclevel = string.lower(match.group(4))
 495
 496         # Reset variables at the beginning of a style definition
 497         match = re_Style.match(lines[i])
 498         if match:
 499             style = string.lower(match.group(4))
 500             counter = ""
 501             toclevel = ""
 502             label = ""
 503             space1 = ""
 504             labelstring = ""
 505             labelstringappendix = ""
 506             labelstring_line = -1
 507             labelstringappendix_line = -1
 508             labeltype_line = -1
 509             latextype = ""
 510             latextype_line = -1
 511
 512         if re_End.match(lines[i]):
 513
 514             # Add a line "LatexType Bib_Environment" if LabelType is Bibliography
 515             # (or change the existing LatexType)
 516             if string.lower(label) == "bibliography":
 517                 if (latextype_line < 0):
 518                     lines.insert(i, "%sLatexType Bib_Environment" % space1)
 519                     i += 1
 520                 else:
 521                     lines[latextype_line] = re_LatexType.sub(r'\1\2\3Bib_Environment', lines[latextype_line])
 522
 523             # Change "LabelType Static" to "LabelType Itemize" for itemize environments
 524             if latextype == "item_environment" and string.lower(label) == "static":
 525                 lines[labeltype_line] = re_LabelType.sub(r'\1\2\3Itemize', lines[labeltype_line])
 526
 527             # Change "LabelType Counter_EnumI" to "LabelType Enumerate" for enumerate environments
 528             if latextype == "item_environment" and string.lower(label) == "counter_enumi":
 529                 lines[labeltype_line] = re_LabelType.sub(r'\1\2\3Enumerate', lines[labeltype_line])
 530                 # Don't add the LabelCounter line later
 531                 counter = ""
 532
 533             # Replace
 534             #
 535             # LabelString "Chapter"
 536             #
 537             # with
 538             #
 539             # LabelString "Chapter \arabic{chapter}"
 540             #
 541             # if this style has a counter. Ditto for LabelStringAppendix.
 542             # This emulates the hardcoded article style numbering of 1.3
 543             #
 544             if counter != "":
 545                 if counters.has_key(style):
 546                     if labelstring_line < 0:
 547                         lines.insert(i, '%sLabelString "%s"' % (space1, counters[style]))
 548                         i += 1
 549                     else:
 550                         new_labelstring = concatenate_label(labelstring, counters[style])
 551                         lines[labelstring_line] = re_LabelString.sub(
 552                                 r'\1\2\3%s' % new_labelstring.replace("\\", "\\\\"),
 553                                 lines[labelstring_line])
 554                 if appendixcounters.has_key(style):
 555                     if labelstringappendix_line < 0:
 556                         lines.insert(i, '%sLabelStringAppendix "%s"' % (space1, appendixcounters[style]))
 557                         i += 1
 558                     else:
 559                         new_labelstring = concatenate_label(labelstring, appendixcounters[style])
 560                         lines[labelstringappendix_line] = re_LabelStringAppendix.sub(
 561                                 r'\1\2\3%s' % new_labelstring.replace("\\", "\\\\"),
 562                                 lines[labelstringappendix_line])
 563
 564                 # Now we can safely add the LabelCounter line
 565                 lines.insert(labeltype_line + 1, "%sLabelCounter %s" % (space1, counter))
 566                 i += 1
 567
 568             # Add the TocLevel setting for sectioning styles
 569             if toclevel == "" and toclevels.has_key(style) and maxcounter <= toclevels[style]:
 570                 lines.insert(i, '%sTocLevel %d' % (space1, toclevels[style]))
 571                 i += 1
 572
 573         i += 1
 574
 575     if usemodules:
 576         i = formatline + 1
 577         for mod in usemodules:
 578             lines.insert(i, "UseModule " + mod)
 579             i += 1
 580
 581     return format + 1
 582
 583
 584 def main(argv):
 585
 586     # Open files
 587     if len(argv) == 1:
 588         input = sys.stdin
 589         output = sys.stdout
 590     elif len(argv) == 3:
 591         input = open(argv[1], 'rb')
 592         output = open(argv[2], 'wb')
 593     else:
 594         error(usage(argv[0]))
 595
 596     # Do the real work
 597     lines = read(input)
 598     format = 1
 599     while (format < currentFormat):
 600         format = convert(lines)
 601     write(output, lines)
 602
 603     # Close files
 604     if len(argv) == 3:
 605         input.close()
 606         output.close()
 607
 608     return 0
 609
 610
 611 if __name__ == "__main__":
 612     main(sys.argv)