// // $Id$ // // SPCSVParser.h // sequel-pro // // Created by Rowan Beentje on September 16, 2009. // Copyright (c) 2009 Rowan Beentje. All rights reserved. // // Permission is hereby granted, free of charge, to any person // obtaining a copy of this software and associated documentation // files (the "Software"), to deal in the Software without // restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be // included in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. // // More info at <http://code.google.com/p/sequel-pro/> /** * This class provides a string class intended for CSV parsing. Unlike SPSQLParser, this * does not extend NSMutableString and instead provides only a subset of similar methods. * Internally, an approach similar to NSScanner is used to support multi-character strings. * The methods are designed with the intention that as a string is parsed the parsed content * is removed. This also allows parsing to occur in "streaming" mode, with parseable content * being pulled off the start of the string as additional content is appended onto the end of * the string, eg from a file. * * Supports: * - Control of field terminator, line terminator, string enclosures and escape characters. * - Multi-character field terminator, line terminator, string enclosures, and escape strings. * - Stream-based processing (recommended that strings split by \n or \r are used when streaming * to minimise multibyte issues) * - Correct treatment of line terminators within quoted strings and proper escape support * including escape characters matching the quote characters in Excel style * * The internal usage of string range finding, similar to the NSScanner approach, means this * could be significantly sped up for single-character terminators. */ #define SPCSVPARSER_TRIM_ENACT_LENGTH 250000 @interface SPCSVParser : NSObject { NSMutableString *csvString; NSUInteger trimPosition; NSUInteger parserPosition; NSUInteger totalLengthParsed; NSUInteger csvStringLength; NSInteger fieldCount; NSString *nullReplacementString; NSString *fieldEndString; NSString *lineEndString; NSString *fieldQuoteString; NSString *escapeString; NSString *escapedFieldEndString; NSString *escapedLineEndString; NSString *escapedFieldQuoteString; NSString *escapedEscapeString; NSInteger fieldEndLength; NSInteger lineEndLength; NSInteger fieldQuoteLength; NSInteger escapeLength; NSCharacterSet *skipCharacterSet; NSScanner *csvScanner; BOOL escapeStringIsFieldQuoteString; BOOL useStrictEscapeMatching; } /* Retrieving data from the CSV string */ - (NSArray *) array; - (NSArray *) getRowAsArray; - (NSArray *) getRowAsArrayAndTrimString:(BOOL)trimString stringIsComplete:(BOOL)stringComplete; /* Adding new data to the string */ - (void) appendString:(NSString *)aString; - (void) setString:(NSString *)aString; /* Basic information */ - (NSUInteger) length; - (NSString *) string; - (NSUInteger) parserPosition; - (NSUInteger) totalLengthParsed; /* Setting the terminator, quote, escape and null character replacement strings */ - (void) setFieldTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setLineTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setFieldQuoteString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setEscapeString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setNullReplacementString:(NSString *)nullString; - (void) setEscapeStringsAreMatchedStrictly:(BOOL)strictMatching; /* Init and internal update methods */ - (void) _initialiseCSVParserDefaults; - (void) _moveParserPastSkippableCharacters; - (NSUInteger) _getDistanceToString:(NSString *)theString; - (void) _updateState; - (NSString *) _convertDisplayString:(NSString *)theString; - (void) _updateSkipCharacterSet; /* Initialisation and teardown */ #pragma mark - - (id) init; - (id) initWithString:(NSString *)aString; - (id) initWithContentsOfFile:(NSString *)path encoding:(NSStringEncoding)enc error:(NSError **)error; - (void) dealloc; @end