From: Uwe Stöhr Date: Sun, 18 May 2008 19:08:23 +0000 (+0000) Subject: csv2lyx.py: new csv2lyx version by Hartmut and José X-Git-Tag: 1.6.10~4759 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=58220e118def0a9104c34892ba0c310e59b82e7f;p=features.git csv2lyx.py: new csv2lyx version by Hartmut and José git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@24818 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/lib/scripts/csv2lyx.py b/lib/scripts/csv2lyx.py index f0ea2bfe1c..7fe4bf7da7 100644 --- a/lib/scripts/csv2lyx.py +++ b/lib/scripts/csv2lyx.py @@ -6,70 +6,100 @@ # Licence details can be found in the file COPYING. # author Hartmut Haase +# author José Matos # Full author contact details are available in file CREDITS # This script reads a csv-table (file name.csv) and converts it into # a LyX-table for versions 1.5.0 and higher (LyX table format 276). +# It uses Python's csv module for parsing. # The original csv2lyx was witten by Antonio Gulino # in Perl for LyX 1.x and modified for LyX table format 276 by the author. # - - -import os, re, string, sys, unicodedata +import csv, unicodedata +import os, sys +import optparse def error(message): sys.stderr.write(message + '\n') sys.exit(1) # processing command line options -if len(sys.argv) == 1 or sys.argv[1] == '--help': - print '''Usage: - csv2lyx [options] mycsvfile mytmptable.lyx +# delegate this to standard module optparse +args = {} +args["usage"] = "Usage: csv2lyx [options] mycsvfile mytmptable.lyx" -This script creates a LyX document containing a table +args["description"] = """This script creates a LyX document containing a table from a comma-separated-value file. The LyX file has format 276 and can be opened with LyX 1.5.0 and newer. - -Options: - -s separator column separator, default is Tab - --help usage instructions - -Remarks: - If your .csv file contains special characters (e. g. umlauts, +""" +parser = optparse.OptionParser(**args) + +parser.set_defaults(excel = 'n', column_sep = 'n', guess_sep = False) +parser.add_option("-e", "--excel", + help="""'character' Excel type, default is 'n' + 'e': Excel-generated CSV file + 't': Excel-generated TAB-delimited CSV file""") +parser.add_option("-s", "--separator", dest="column_sep", + help= "column separator, default is ','") +parser.add_option("-g", "--guess-sep", action="store_true", + help = "guess the columns separator") + +group = optparse.OptionGroup(parser, "Remarks", """If your .csv file contains special characters (e. g. umlauts, accented letters, etc.) make sure it is coded in UTF-8 (unicode). - Else LyX will loose some cell contents.''' - sys.exit(0) - -# print len(sys.argv), sys.argv -separator = '\t' -infile = "" -if len(sys.argv) == 3: - infile = sys.argv[1] - outfile = sys.argv[2] -elif len(sys.argv) == 5: - infile = sys.argv[3] - outfile = sys.argv[4] - if sys.argv[1] == '-s': - separator = sys.argv[2] + Else LyX will loose some cell contents. If your .csv file was not written according to the "Common Format and MIME Type for Comma-Separated Values (CSV) Files" (http://tools.ietf.org/html/rfc4180) there may be unexpected results.""") +parser.add_option_group(group) + +(options, args) = parser.parse_args() + +# validate input +if len(args) == 1: + infile = args[0] + fout = sys.stdout +elif len(args) ==2: + infile = args[0] + fout = open(args[1], 'w') +else: + parser.print_help() + sys.exit(1) if not os.path.exists(infile): error('File "%s" not found.' % infile) + +dialects = {'n' : None, 'e' : 'excel', 't' : 'excel-tab'} +if options.excel not in dialects: + parser.print_help() + sys.exit(1) +dialect= dialects[options.excel] + +# when no special column separator is given, try to detect it: +if options.column_sep == 'n': + options.guess_sep = 'True' +print options.column_sep, options.guess_sep +if options.guess_sep: + guesser = csv.Sniffer() + input_file = "".join(open(infile,'rb').readlines()) + try: + dialect = guesser.sniff(input_file) + print 'found:', dialect.delimiter + reader = csv.reader(open(infile, "rb"), dialect= dialect) + except: + print 'error, using ,' + reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=',') +else: + reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=options.column_sep) + # read input -finput = open(infile, 'r') -rowcontent = finput.readlines() -finput.close() -num_rows = len(rowcontent) # number of lines -# print 'num_rows ', num_rows -i = 0 num_cols = 1 # max columns -while i < num_rows: - # print len(rowcontent[i]), ' ', rowcontent[i] - num_cols = max(num_cols, rowcontent[i].count(separator) + 1) - i += 1 -# print num_cols +rows = [] + +for row in reader: + num_cols = max(num_cols, len(row)) + rows.append(row) + +num_rows = reader.line_num # number of lines -fout = open(outfile, 'w') +# create a LyX file ##################### # write first part #################### @@ -125,21 +155,31 @@ while i < num_cols: j = 0 while j < num_rows: fout.write('\n') - row = str(rowcontent[j]) - row = string.split(row,separator) - #print j, ': ' , row + num_cols_2 = len(rows[j]) # columns in current row + #print j, ': ' , rows[j] ############################ # write contents of one line ############################ i = 0 - while i < num_cols: + while i < num_cols_2: fout.write(""" \\begin_inset Text \\begin_layout Standard\n""") - fout.write(row[i].strip('\n')) + #print rows[j][i] + fout.write(rows[j][i]) fout.write('\n\\end_layout\n\n\\end_inset\n\n') i += 1 +# If row has less columns than num_cols + if num_cols_2 < num_cols: + while i < num_cols: + fout.write(""" +\\begin_inset Text + +\\begin_layout Standard\n""") + fout.write(' ') + fout.write('\n\\end_layout\n\n\\end_inset\n\n') + i += 1 fout.write('\n') j += 1 ##################### @@ -154,4 +194,5 @@ fout.write(""" \\end_body \\end_document\n""") +# close the LyX file fout.close()