From: Uwe Stöhr <uwestoehr@web.de>
Date: Sun, 18 May 2008 19:08:23 +0000 (+0000)
Subject: csv2lyx.py: new csv2lyx version by Hartmut and José
X-Git-Tag: 1.6.10~4759
X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=58220e118def0a9104c34892ba0c310e59b82e7f;p=features.git

csv2lyx.py: new csv2lyx version by Hartmut and José

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@24818 a592a061-630c-0410-9148-cb99ea01b6c8
---

diff --git a/lib/scripts/csv2lyx.py b/lib/scripts/csv2lyx.py
index f0ea2bfe1c..7fe4bf7da7 100644
--- a/lib/scripts/csv2lyx.py
+++ b/lib/scripts/csv2lyx.py
@@ -6,70 +6,100 @@
 # Licence details can be found in the file COPYING.
 
 # author Hartmut Haase
+# author JosÃ© Matos
 
 # Full author contact details are available in file CREDITS
 
 # This script reads a csv-table (file name.csv) and converts it into
 # a LyX-table for versions 1.5.0 and higher (LyX table format 276).
+# It uses Python's csv module for parsing.
 # The original csv2lyx was witten by Antonio Gulino <antonio.gulino@tin.it>
 # in Perl for LyX 1.x and modified for LyX table format 276 by the author.
 #
-
-
-import os, re, string, sys, unicodedata
+import csv, unicodedata
+import os, sys
+import optparse
 
 def error(message):
     sys.stderr.write(message + '\n')
     sys.exit(1)
 
 # processing command line options
-if len(sys.argv) == 1 or sys.argv[1] == '--help':
-    print '''Usage:
-   csv2lyx [options] mycsvfile mytmptable.lyx
+# delegate this to standard module optparse
+args = {}
+args["usage"] = "Usage: csv2lyx [options] mycsvfile mytmptable.lyx"
 
-This script creates a LyX document containing a table
+args["description"] = """This script creates a LyX document containing a table
 from a comma-separated-value file. The LyX file has format 276
 and can be opened with LyX 1.5.0 and newer.
-
-Options:
-   -s separator    column separator, default is Tab
-   --help          usage instructions
-
-Remarks:
-   If your .csv file contains special characters (e. g. umlauts,
+"""
+parser = optparse.OptionParser(**args)
+
+parser.set_defaults(excel = 'n', column_sep = 'n', guess_sep = False)
+parser.add_option("-e", "--excel",
+                  help="""'character'  Excel type, default is 'n'
+   		       'e': Excel-generated CSV file
+   		       't': Excel-generated TAB-delimited CSV file""")
+parser.add_option("-s", "--separator", dest="column_sep",
+                  help= "column separator, default is ','")
+parser.add_option("-g", "--guess-sep", action="store_true",
+                  help = "guess the columns separator")
+
+group = optparse.OptionGroup(parser, "Remarks", """If your .csv file contains special characters (e. g. umlauts,
    accented letters, etc.) make sure it is coded in UTF-8 (unicode).
-   Else LyX will loose some cell contents.'''
-    sys.exit(0)
-
-# print len(sys.argv), sys.argv
-separator = '\t'
-infile = ""
-if len(sys.argv) == 3:
-	infile = sys.argv[1]
-	outfile = sys.argv[2]
-elif len(sys.argv) == 5:
-	infile = sys.argv[3]
-	outfile = sys.argv[4]
-	if sys.argv[1] == '-s':
-		separator = sys.argv[2]
+   Else LyX will loose some cell contents. If your .csv file was not written according to the "Common Format and MIME Type for Comma-Separated Values (CSV) Files" (http://tools.ietf.org/html/rfc4180) there may be unexpected results.""")
+parser.add_option_group(group)
+
+(options, args) = parser.parse_args()
+
+# validate input
+if len(args) == 1:
+    infile = args[0]
+    fout = sys.stdout
+elif len(args) ==2:
+    infile = args[0]
+    fout = open(args[1], 'w')
+else:
+    parser.print_help()
+    sys.exit(1)
 
 if not os.path.exists(infile):
 	error('File "%s" not found.' % infile)
+
+dialects = {'n' : None, 'e' : 'excel', 't' : 'excel-tab'}
+if options.excel not in dialects:
+    parser.print_help()
+    sys.exit(1)
+dialect= dialects[options.excel]
+
+# when no special column separator is given, try to detect it:
+if options.column_sep == 'n':
+    options.guess_sep = 'True'
+print options.column_sep, options.guess_sep
+if options.guess_sep:
+    guesser = csv.Sniffer()
+    input_file = "".join(open(infile,'rb').readlines())
+    try:
+        dialect = guesser.sniff(input_file)
+        print 'found:', dialect.delimiter
+        reader = csv.reader(open(infile, "rb"), dialect= dialect)
+    except:
+        print 'error, using ,'
+        reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=',')
+else:
+    reader = csv.reader(open(infile, "rb"), dialect= dialect, delimiter=options.column_sep)
+
 # read input
-finput = open(infile, 'r')
-rowcontent = finput.readlines()
-finput.close()
-num_rows = len(rowcontent) # number of lines
-# print 'num_rows ', num_rows
-i = 0
 num_cols = 1 # max columns
-while i < num_rows:
-	# print len(rowcontent[i]), '   ', rowcontent[i]
-	num_cols = max(num_cols, rowcontent[i].count(separator) + 1)
-	i += 1
-# print num_cols
+rows = []
+
+for row in reader:
+    num_cols = max(num_cols, len(row))
+    rows.append(row)
+
+num_rows = reader.line_num # number of lines
 
-fout = open(outfile, 'w')
+# create a LyX file
 #####################
 # write first part
 ####################
@@ -125,21 +155,31 @@ while i < num_cols:
 j = 0
 while j < num_rows:
 	fout.write('<row>\n')
-	row = str(rowcontent[j])
-	row = string.split(row,separator)
-	#print j, ': ' , row
+	num_cols_2 = len(rows[j]) # columns in current row
+	#print j, ': ' , rows[j]
 ############################
 # write contents of one line
 ############################
 	i = 0
-	while i < num_cols:
+	while i < num_cols_2:
 		fout.write("""<cell alignment="left" valignment="top" usebox="none">
 \\begin_inset Text
 
 \\begin_layout Standard\n""")
-		fout.write(row[i].strip('\n'))
+		#print rows[j][i]
+		fout.write(rows[j][i])
 		fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
 		i += 1
+# If row has less columns than num_cols
+	if num_cols_2 < num_cols:
+		while i < num_cols:
+			fout.write("""<cell alignment="left" valignment="top" usebox="none">
+\\begin_inset Text
+
+\\begin_layout Standard\n""")
+			fout.write(' ')
+			fout.write('\n\\end_layout\n\n\\end_inset\n</cell>\n')
+			i += 1
 	fout.write('</row>\n')
 	j += 1
 #####################
@@ -154,4 +194,5 @@ fout.write("""</lyxtabular>
 
 \\end_body
 \\end_document\n""")
+# close the LyX file
 fout.close()