3 # file spreadsheet_to_docbook.py
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
7 # author Thibaut Cuvelier & Kornel Benko
9 # Full author contact details are available in file CREDITS.
11 """reformat output of ssconvert of a single spreadsheet to match the needs
12 of docbook5 table format .
14 Expects to read from file specified by sys.argv[1]
15 and output to to file specified by sys.argv[2]
23 def process_file(contents):
24 # Scrap the header and the footer.
25 contents = contents.split("<body>")[1]
26 contents = contents.split("</body>")[0]
28 # Gnumeric may generate more than one table, just take the first one.
29 contents = contents.split("</table>")[0] + "\n</table>"
31 # Convert the rest of the table to DocBook.
32 contents = contents.replace("<p></p>", "")
33 contents = contents.replace("<i>", "<emphasis>")
34 contents = contents.replace("</i>", "</emphasis>")
35 contents = contents.replace("<b>", "<emphasis role='bold'>")
36 contents = contents.replace("</b>", "</emphasis>")
37 contents = contents.replace("<u>", "<emphasis role='underline'>")
38 contents = contents.replace("</u>", "</emphasis>")
40 contents = re.sub(r"<font color=\"(.*)\">", "<phrase role='color \\1'>", contents)
41 assert '<font' not in contents # If this happens, implement something to catch these cases.
42 contents = contents.replace("</font>", "</phrase>") # Generates invalid XML if there are still font tags left...
44 # If the table has a caption, then the right tag is <table>. Otherwise, it's <informaltable>.
45 if '<caption>' not in contents:
46 contents = contents.replace("<table", "<informaltable")
47 contents = contents.replace("</table>", "</informaltable>")
49 # Return the processed string.
50 contents = contents.replace("\n\n", "\n")
54 if __name__ == "__main__":
55 if len(sys.argv) == 1:
56 # Read from stdin, output to stdout.
57 contents = sys.stdin.read()
60 # Read from output of ssconvert
61 assert len(sys.argv) == 3 # Script name, file to process, output file.
62 proc = subprocess.Popen(["ssconvert", "--export-type=Gnumeric_html:xhtml", sys.argv[1], "fd://1"], stdout=subprocess.PIPE)
63 f = open(sys.argv[2], 'w')
64 sys.stdout = f # Redirect stdout to the output file.
65 contents = proc.stdout.read()
67 # Process and output to stdout.
68 print(process_file(contents))