aboutsummaryrefslogtreecommitdiffstats
path: root/Source/SPCSVParser.h
diff options
context:
space:
mode:
authorrowanbeentje <rowan@beent.je>2009-09-28 00:43:41 +0000
committerrowanbeentje <rowan@beent.je>2009-09-28 00:43:41 +0000
commitcc0c0a7842e3bff325fa29c71f5115361981797d (patch)
tree8c101a961ba86f1f04ddbba00ce76963d9c4f3e3 /Source/SPCSVParser.h
parent2183eeefefb81846c2cc2c6b4bf68b12167f2b24 (diff)
downloadsequelpro-cc0c0a7842e3bff325fa29c71f5115361981797d.tar.gz
sequelpro-cc0c0a7842e3bff325fa29c71f5115361981797d.tar.bz2
sequelpro-cc0c0a7842e3bff325fa29c71f5115361981797d.zip
Rewrite CSV import:
- Replace the CSV parsing function (arrayForCSV:) with a new SPCSVParser class - Make speed improvements to SPCSVParser to achieve 1.9x faster parsing than the old arrayForCSV: function - Rewrite CSV imports to be performed as a streaming import, keeping memory usage much much lower - CSV field mapping preview is now shown very early on in the import process, as soon as the first hundred rwos are available for a preview - Progress bars are more consistent and accurate - CSV rows are grouped into batches of up to 50 (depending on line length) for import, falling back to one-query-per-row if errors occur. The current error reporting level is therefore maintained, but imports of non-erroring data are much much faster. - Improve processing speed slightly - Fix some odd edge cases in CSV parsing This addresses issue #389.
Diffstat (limited to 'Source/SPCSVParser.h')
-rw-r--r--Source/SPCSVParser.h117
1 files changed, 117 insertions, 0 deletions
diff --git a/Source/SPCSVParser.h b/Source/SPCSVParser.h
new file mode 100644
index 00000000..0f1a8a20
--- /dev/null
+++ b/Source/SPCSVParser.h
@@ -0,0 +1,117 @@
+//
+// $Id$
+//
+// SPCSVParser.h
+// sequel-pro
+//
+// Created by Rowan Beentje on 16/09/2009.
+// Copyright 2009 Rowan Beentje. All rights reserved.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// More info at <http://code.google.com/p/sequel-pro/>
+
+#import <Cocoa/Cocoa.h>
+
+/*
+ * This class provides a string class intended for CSV parsing. Unlike SPSQLParser, this
+ * does not extend NSMutableString and instead provides only a subset of similar methods.
+ * Internally, an approach similar to NSScanner is used to support multi-character strings.
+ * The methods are designed with the intention that as a string is parsed the parsed content
+ * is removed. This also allows parsing to occur in "streaming" mode, with parseable content
+ * being pulled off the start of the string as additional content is appended onto the end of
+ * the string, eg from a file.
+ *
+ * Supports:
+ * - Control of field terminator, line terminator, string enclosures and escape characters.
+ * - Multi-character field terminator, line terminator, string enclosures, and escape strings.
+ * - Stream-based processing (recommended that strings split by \n or \r are used when streaming
+ * to minimise multibyte issues)
+ * - Correct treatment of line terminators within quoted strings and proper escape support
+ * including escape characters matching the quote characters in Excel style
+ *
+ * The internal usage of string range finding, similar to the NSScanner approach, means this
+ * could be significantly sped up for single-character terminators.
+ */
+
+#define SPCSVPARSER_TRIM_ENACT_LENGTH 250000
+
+@interface SPCSVParser : NSObject
+{
+ NSMutableString *csvString;
+
+ long trimPosition;
+ long parserPosition;
+ long totalLengthParsed;
+ long csvStringLength;
+ int fieldCount;
+
+ NSString *nullReplacementString;
+ NSString *fieldEndString;
+ NSString *lineEndString;
+ NSString *fieldQuoteString;
+ NSString *escapeString;
+ NSString *escapedFieldEndString;
+ NSString *escapedLineEndString;
+ NSString *escapedFieldQuoteString;
+ NSString *escapedEscapeString;
+ int fieldEndLength;
+ int lineEndLength;
+ int fieldQuoteLength;
+ int escapeLength;
+ NSCharacterSet *skipCharacterSet;
+ NSScanner *csvScanner;
+
+ BOOL escapeStringIsFieldQuoteString;
+}
+
+/* Retrieving data from the CSV string */
+- (NSArray *) array;
+- (NSArray *) getRowAsArray;
+- (NSArray *) getRowAsArrayAndTrimString:(BOOL)trimString stringIsComplete:(BOOL)stringComplete;
+
+/* Adding new data to the string */
+- (void) appendString:(NSString *)aString;
+- (void) setString:(NSString *)aString;
+
+/* Basic information */
+- (NSUInteger) length;
+- (NSString *) string;
+- (long) parserPosition;
+- (long) totalLengthParsed;
+
+/* Setting the terminator, quote, escape and null character replacement strings */
+- (void) setFieldTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString;
+- (void) setLineTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString;
+- (void) setFieldQuoteString:(NSString *)theString convertDisplayStrings:(BOOL)convertString;
+- (void) setEscapeString:(NSString *)theString convertDisplayStrings:(BOOL)convertString;
+- (void) setNullReplacementString:(NSString *)nullString;
+
+/* Init and internal update methods */
+- (void) _initialiseCSVParserDefaults;
+- (void) _moveParserPastSkippableCharacters;
+- (long) _getDistanceToString:(NSString *)theString;
+- (void) _updateState;
+- (NSString *) _convertDisplayString:(NSString *)theString;
+- (void) _updateSkipCharacterSet;
+
+/* Initialisation and teardown */
+#pragma mark -
+- (id) init;
+- (id) initWithString:(NSString *)aString;
+- (id) initWithContentsOfFile:(NSString *)path encoding:(NSStringEncoding)enc error:(NSError **)error;
+- (void) dealloc;
+
+@end