From 63b0fc6269b38edf7234b9f151b80d81f614c0a3 Mon Sep 17 00:00:00 2001 From: Filipp Lepalaan Date: Tue, 4 Aug 2015 10:11:24 +0300 Subject: Initial commit First public commit --- servo/lib/ucsv.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 servo/lib/ucsv.py (limited to 'servo/lib/ucsv.py') diff --git a/servo/lib/ucsv.py b/servo/lib/ucsv.py new file mode 100644 index 0000000..91b2acf --- /dev/null +++ b/servo/lib/ucsv.py @@ -0,0 +1,49 @@ +""" +Borrwed from +http://stackoverflow.com/questions/1846135/python-csv-library-with-unicode-utf-8-support-that-just-works +""" + +import csv +import codecs + +class UnicodeCsvReader(object): + def __init__(self, f, encoding="utf-8", **kwargs): + self.csv_reader = csv.reader(f, **kwargs) + self.encoding = encoding + + def __iter__(self): + return self + + def next(self): + # read and split the csv row into fields + row = self.csv_reader.next() + # now decode + return [unicode(cell, self.encoding) for cell in row] + + @property + def line_num(self): + return self.csv_reader.line_num + + +class UnicodeDictReader(csv.DictReader): + def __init__(self, f, encoding="utf-8", fieldnames=None, **kwargs): + csv.DictReader.__init__(self, f, fieldnames=fieldnames, **kwargs) + self.reader = UnicodeCsvReader(f, encoding=encoding, **kwargs) + + +def read_excel_file(f): + dialect = csv.Sniffer().sniff(codecs.EncodedFile(f, "utf-8").read(1024)) + #f.open() + return UnicodeCsvReader(codecs.EncodedFile(f, "utf-8"), + "utf-8", dialect=dialect) + +def main(): + import sys + with codecs.open(sys.argv[1], 'rUb') as csvfile: + reader = read_excel_file(csvfile) + for row in reader: + print u', '.join(row) + + +if __name__ == '__main__': + main() -- cgit v1.2.3