diff options
author | Filipp Lepalaan <filipp@mac.com> | 2015-08-04 10:11:24 +0300 |
---|---|---|
committer | Filipp Lepalaan <filipp@mac.com> | 2015-08-04 10:11:24 +0300 |
commit | 63b0fc6269b38edf7234b9f151b80d81f614c0a3 (patch) | |
tree | 555de3068f33f8dddb4619349bbea7d9b7c822fd /servo/lib/ucsv.py | |
download | Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.gz Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.bz2 Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.zip |
Initial commit
First public commit
Diffstat (limited to 'servo/lib/ucsv.py')
-rw-r--r-- | servo/lib/ucsv.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/servo/lib/ucsv.py b/servo/lib/ucsv.py new file mode 100644 index 0000000..91b2acf --- /dev/null +++ b/servo/lib/ucsv.py @@ -0,0 +1,49 @@ +""" +Borrwed from +http://stackoverflow.com/questions/1846135/python-csv-library-with-unicode-utf-8-support-that-just-works +""" + +import csv +import codecs + +class UnicodeCsvReader(object): + def __init__(self, f, encoding="utf-8", **kwargs): + self.csv_reader = csv.reader(f, **kwargs) + self.encoding = encoding + + def __iter__(self): + return self + + def next(self): + # read and split the csv row into fields + row = self.csv_reader.next() + # now decode + return [unicode(cell, self.encoding) for cell in row] + + @property + def line_num(self): + return self.csv_reader.line_num + + +class UnicodeDictReader(csv.DictReader): + def __init__(self, f, encoding="utf-8", fieldnames=None, **kwargs): + csv.DictReader.__init__(self, f, fieldnames=fieldnames, **kwargs) + self.reader = UnicodeCsvReader(f, encoding=encoding, **kwargs) + + +def read_excel_file(f): + dialect = csv.Sniffer().sniff(codecs.EncodedFile(f, "utf-8").read(1024)) + #f.open() + return UnicodeCsvReader(codecs.EncodedFile(f, "utf-8"), + "utf-8", dialect=dialect) + +def main(): + import sys + with codecs.open(sys.argv[1], 'rUb') as csvfile: + reader = read_excel_file(csvfile) + for row in reader: + print u', '.join(row) + + +if __name__ == '__main__': + main() |