diff options
author | rowanbeentje <rowan@beent.je> | 2012-01-06 01:58:16 +0000 |
---|---|---|
committer | rowanbeentje <rowan@beent.je> | 2012-01-06 01:58:16 +0000 |
commit | f50a951609470af0cfb080b3932da899a3880502 (patch) | |
tree | 5f16a6532133fbf499838101b8aa0f69e299f826 /Source/SPCSVParser.m | |
parent | ef58dcc476f4732f3b3baa32486c8be504bbd242 (diff) | |
download | sequelpro-f50a951609470af0cfb080b3932da899a3880502.tar.gz sequelpro-f50a951609470af0cfb080b3932da899a3880502.tar.bz2 sequelpro-f50a951609470af0cfb080b3932da899a3880502.zip |
- Add a new CSV import field escape setting, as a new default, supporting both backslash and double-character escaping. This will mean the vast majroty of CSVs selected for import will be read correctly by default, fixing issues such as Issue #1252
Diffstat (limited to 'Source/SPCSVParser.m')
-rw-r--r-- | Source/SPCSVParser.m | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/Source/SPCSVParser.m b/Source/SPCSVParser.m index 5429896e..e012e323 100644 --- a/Source/SPCSVParser.m +++ b/Source/SPCSVParser.m @@ -85,6 +85,7 @@ NSUInteger startingParserPosition, nextQuoteDistance, nextFieldEndDistance, nextLineEndDistance; NSInteger skipLength, j; BOOL fieldIsQuoted, isEscaped; + BOOL nonStrictEscapeMatchingFallback = NO; BOOL lineEndingEncountered = NO; if (fieldCount == NSNotFound) @@ -120,6 +121,7 @@ if (escapeLength && nextQuoteDistance != NSNotFound) { j = 1; isEscaped = NO; + nonStrictEscapeMatchingFallback = NO; if (!escapeStringIsFieldQuoteString) { while (j * escapeLength <= (NSInteger)nextQuoteDistance && ([[csvString substringWithRange:NSMakeRange((parserPosition + nextQuoteDistance - (j*escapeLength)), escapeLength)] isEqualToString:escapeString])) @@ -128,7 +130,13 @@ j++; } skipLength = fieldQuoteLength; - } else { + if (!useStrictEscapeMatching && !isEscaped) nonStrictEscapeMatchingFallback = YES; + } + + // If the escape string is the field quote string, check for doubled (Excel-style) usage. + // Also, if the parser is in loose mode, also support field end strings quoted by using + // another field end string, as used by Excel + if (escapeStringIsFieldQuoteString || nonStrictEscapeMatchingFallback) { if (parserPosition + nextQuoteDistance + (2 * fieldQuoteLength) <= csvStringLength && [[csvString substringWithRange:NSMakeRange(parserPosition + nextQuoteDistance + fieldQuoteLength, fieldQuoteLength)] isEqualToString:fieldQuoteString]) { @@ -143,7 +151,7 @@ // Append the matched string, together with the field quote character // which has been determined to be within the string - but append the // field end character unescaped to avoid later processing. - if (escapeStringIsFieldQuoteString) { + if (escapeStringIsFieldQuoteString || nonStrictEscapeMatchingFallback) { [csvCellString appendString:[csvString substringWithRange:NSMakeRange(parserPosition, nextQuoteDistance+fieldQuoteLength)]]; } else { [csvCellString appendString:[csvString substringWithRange:NSMakeRange(parserPosition, nextQuoteDistance - escapeLength)]]; @@ -461,6 +469,17 @@ if (nullString) nullReplacementString = [[NSString alloc] initWithString:nullString]; } +/** + * By default, field end strings aren't matched strictly - as well as the defined escape + * character, the class will automatically match doubled-up field quote strings, as exported + * by Excel and in common use (eg "field contains ""quotes"""). To switch escaping to strict + * mode, set this to YES. + */ +- (void) setEscapeStringsAreMatchedStrictly:(BOOL)strictMatching +{ + useStrictEscapeMatching = strictMatching; +} + #pragma mark - #pragma mark Init and internal update methods @@ -486,6 +505,7 @@ escapedLineEndString = [[NSString alloc] initWithString:@"\\\n"]; escapedFieldQuoteString = [[NSString alloc] initWithString:@"\\\""]; escapedEscapeString = [[NSString alloc] initWithString:@"\\\\"]; + useStrictEscapeMatching = NO; fieldEndLength = [fieldEndString length]; lineEndLength = [lineEndString length]; fieldQuoteLength = [fieldQuoteString length]; |