aboutsummaryrefslogtreecommitdiffstats
path: root/Source/SPCSVParser.m
diff options
context:
space:
mode:
authorrowanbeentje <rowan@beent.je>2012-01-06 01:58:16 +0000
committerrowanbeentje <rowan@beent.je>2012-01-06 01:58:16 +0000
commitf50a951609470af0cfb080b3932da899a3880502 (patch)
tree5f16a6532133fbf499838101b8aa0f69e299f826 /Source/SPCSVParser.m
parentef58dcc476f4732f3b3baa32486c8be504bbd242 (diff)
downloadsequelpro-f50a951609470af0cfb080b3932da899a3880502.tar.gz
sequelpro-f50a951609470af0cfb080b3932da899a3880502.tar.bz2
sequelpro-f50a951609470af0cfb080b3932da899a3880502.zip
- Add a new CSV import field escape setting, as a new default, supporting both backslash and double-character escaping. This will mean the vast majroty of CSVs selected for import will be read correctly by default, fixing issues such as Issue #1252
Diffstat (limited to 'Source/SPCSVParser.m')
-rw-r--r--Source/SPCSVParser.m24
1 files changed, 22 insertions, 2 deletions
diff --git a/Source/SPCSVParser.m b/Source/SPCSVParser.m
index 5429896e..e012e323 100644
--- a/Source/SPCSVParser.m
+++ b/Source/SPCSVParser.m
@@ -85,6 +85,7 @@
NSUInteger startingParserPosition, nextQuoteDistance, nextFieldEndDistance, nextLineEndDistance;
NSInteger skipLength, j;
BOOL fieldIsQuoted, isEscaped;
+ BOOL nonStrictEscapeMatchingFallback = NO;
BOOL lineEndingEncountered = NO;
if (fieldCount == NSNotFound)
@@ -120,6 +121,7 @@
if (escapeLength && nextQuoteDistance != NSNotFound) {
j = 1;
isEscaped = NO;
+ nonStrictEscapeMatchingFallback = NO;
if (!escapeStringIsFieldQuoteString) {
while (j * escapeLength <= (NSInteger)nextQuoteDistance
&& ([[csvString substringWithRange:NSMakeRange((parserPosition + nextQuoteDistance - (j*escapeLength)), escapeLength)] isEqualToString:escapeString]))
@@ -128,7 +130,13 @@
j++;
}
skipLength = fieldQuoteLength;
- } else {
+ if (!useStrictEscapeMatching && !isEscaped) nonStrictEscapeMatchingFallback = YES;
+ }
+
+ // If the escape string is the field quote string, check for doubled (Excel-style) usage.
+ // Also, if the parser is in loose mode, also support field end strings quoted by using
+ // another field end string, as used by Excel
+ if (escapeStringIsFieldQuoteString || nonStrictEscapeMatchingFallback) {
if (parserPosition + nextQuoteDistance + (2 * fieldQuoteLength) <= csvStringLength
&& [[csvString substringWithRange:NSMakeRange(parserPosition + nextQuoteDistance + fieldQuoteLength, fieldQuoteLength)] isEqualToString:fieldQuoteString])
{
@@ -143,7 +151,7 @@
// Append the matched string, together with the field quote character
// which has been determined to be within the string - but append the
// field end character unescaped to avoid later processing.
- if (escapeStringIsFieldQuoteString) {
+ if (escapeStringIsFieldQuoteString || nonStrictEscapeMatchingFallback) {
[csvCellString appendString:[csvString substringWithRange:NSMakeRange(parserPosition, nextQuoteDistance+fieldQuoteLength)]];
} else {
[csvCellString appendString:[csvString substringWithRange:NSMakeRange(parserPosition, nextQuoteDistance - escapeLength)]];
@@ -461,6 +469,17 @@
if (nullString) nullReplacementString = [[NSString alloc] initWithString:nullString];
}
+/**
+ * By default, field end strings aren't matched strictly - as well as the defined escape
+ * character, the class will automatically match doubled-up field quote strings, as exported
+ * by Excel and in common use (eg "field contains ""quotes"""). To switch escaping to strict
+ * mode, set this to YES.
+ */
+- (void) setEscapeStringsAreMatchedStrictly:(BOOL)strictMatching
+{
+ useStrictEscapeMatching = strictMatching;
+}
+
#pragma mark -
#pragma mark Init and internal update methods
@@ -486,6 +505,7 @@
escapedLineEndString = [[NSString alloc] initWithString:@"\\\n"];
escapedFieldQuoteString = [[NSString alloc] initWithString:@"\\\""];
escapedEscapeString = [[NSString alloc] initWithString:@"\\\\"];
+ useStrictEscapeMatching = NO;
fieldEndLength = [fieldEndString length];
lineEndLength = [lineEndString length];
fieldQuoteLength = [fieldQuoteString length];