// // $Id$ // // SPCSVParser.h // sequel-pro // // Created by Rowan Beentje on 16/09/2009. // Copyright 2009 Rowan Beentje. All rights reserved. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // More info at <http://code.google.com/p/sequel-pro/> /* * This class provides a string class intended for CSV parsing. Unlike SPSQLParser, this * does not extend NSMutableString and instead provides only a subset of similar methods. * Internally, an approach similar to NSScanner is used to support multi-character strings. * The methods are designed with the intention that as a string is parsed the parsed content * is removed. This also allows parsing to occur in "streaming" mode, with parseable content * being pulled off the start of the string as additional content is appended onto the end of * the string, eg from a file. * * Supports: * - Control of field terminator, line terminator, string enclosures and escape characters. * - Multi-character field terminator, line terminator, string enclosures, and escape strings. * - Stream-based processing (recommended that strings split by \n or \r are used when streaming * to minimise multibyte issues) * - Correct treatment of line terminators within quoted strings and proper escape support * including escape characters matching the quote characters in Excel style * * The internal usage of string range finding, similar to the NSScanner approach, means this * could be significantly sped up for single-character terminators. */ #define SPCSVPARSER_TRIM_ENACT_LENGTH 250000 @interface SPCSVParser : NSObject { NSMutableString *csvString; NSUInteger trimPosition; NSUInteger parserPosition; NSUInteger totalLengthParsed; NSUInteger csvStringLength; NSInteger fieldCount; NSString *nullReplacementString; NSString *fieldEndString; NSString *lineEndString; NSString *fieldQuoteString; NSString *escapeString; NSString *escapedFieldEndString; NSString *escapedLineEndString; NSString *escapedFieldQuoteString; NSString *escapedEscapeString; NSInteger fieldEndLength; NSInteger lineEndLength; NSInteger fieldQuoteLength; NSInteger escapeLength; NSCharacterSet *skipCharacterSet; NSScanner *csvScanner; BOOL escapeStringIsFieldQuoteString; } /* Retrieving data from the CSV string */ - (NSArray *) array; - (NSArray *) getRowAsArray; - (NSArray *) getRowAsArrayAndTrimString:(BOOL)trimString stringIsComplete:(BOOL)stringComplete; /* Adding new data to the string */ - (void) appendString:(NSString *)aString; - (void) setString:(NSString *)aString; /* Basic information */ - (NSUInteger) length; - (NSString *) string; - (NSUInteger) parserPosition; - (NSUInteger) totalLengthParsed; /* Setting the terminator, quote, escape and null character replacement strings */ - (void) setFieldTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setLineTerminatorString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setFieldQuoteString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setEscapeString:(NSString *)theString convertDisplayStrings:(BOOL)convertString; - (void) setNullReplacementString:(NSString *)nullString; /* Init and internal update methods */ - (void) _initialiseCSVParserDefaults; - (void) _moveParserPastSkippableCharacters; - (NSUInteger) _getDistanceToString:(NSString *)theString; - (void) _updateState; - (NSString *) _convertDisplayString:(NSString *)theString; - (void) _updateSkipCharacterSet; /* Initialisation and teardown */ #pragma mark - - (id) init; - (id) initWithString:(NSString *)aString; - (id) initWithContentsOfFile:(NSString *)path encoding:(NSStringEncoding)enc error:(NSError **)error; - (void) dealloc; @end