aboutsummaryrefslogtreecommitdiffstats
path: root/servo/lib/ucsv.py
diff options
context:
space:
mode:
authorFilipp Lepalaan <filipp@mac.com>2015-08-04 10:11:24 +0300
committerFilipp Lepalaan <filipp@mac.com>2015-08-04 10:11:24 +0300
commit63b0fc6269b38edf7234b9f151b80d81f614c0a3 (patch)
tree555de3068f33f8dddb4619349bbea7d9b7c822fd /servo/lib/ucsv.py
downloadServo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.gz
Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.bz2
Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.zip
Initial commit
First public commit
Diffstat (limited to 'servo/lib/ucsv.py')
-rw-r--r--servo/lib/ucsv.py49
1 files changed, 49 insertions, 0 deletions
diff --git a/servo/lib/ucsv.py b/servo/lib/ucsv.py
new file mode 100644
index 0000000..91b2acf
--- /dev/null
+++ b/servo/lib/ucsv.py
@@ -0,0 +1,49 @@
+"""
+Borrwed from
+http://stackoverflow.com/questions/1846135/python-csv-library-with-unicode-utf-8-support-that-just-works
+"""
+
+import csv
+import codecs
+
+class UnicodeCsvReader(object):
+ def __init__(self, f, encoding="utf-8", **kwargs):
+ self.csv_reader = csv.reader(f, **kwargs)
+ self.encoding = encoding
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ # read and split the csv row into fields
+ row = self.csv_reader.next()
+ # now decode
+ return [unicode(cell, self.encoding) for cell in row]
+
+ @property
+ def line_num(self):
+ return self.csv_reader.line_num
+
+
+class UnicodeDictReader(csv.DictReader):
+ def __init__(self, f, encoding="utf-8", fieldnames=None, **kwargs):
+ csv.DictReader.__init__(self, f, fieldnames=fieldnames, **kwargs)
+ self.reader = UnicodeCsvReader(f, encoding=encoding, **kwargs)
+
+
+def read_excel_file(f):
+ dialect = csv.Sniffer().sniff(codecs.EncodedFile(f, "utf-8").read(1024))
+ #f.open()
+ return UnicodeCsvReader(codecs.EncodedFile(f, "utf-8"),
+ "utf-8", dialect=dialect)
+
+def main():
+ import sys
+ with codecs.open(sys.argv[1], 'rUb') as csvfile:
+ reader = read_excel_file(csvfile)
+ for row in reader:
+ print u', '.join(row)
+
+
+if __name__ == '__main__':
+ main()