diff options
-rw-r--r-- | Source/TableDump.m | 360 |
1 files changed, 244 insertions, 116 deletions
diff --git a/Source/TableDump.m b/Source/TableDump.m index 321ae9f2..ccdda6ee 100644 --- a/Source/TableDump.m +++ b/Source/TableDump.m @@ -34,6 +34,7 @@ #import "SPTableData.h" #import "SPStringAdditions.h" #import "SPArrayAdditions.h" +#import "RegexKitLite.h" @implementation TableDump @@ -578,12 +579,14 @@ nil, nil, nil, NSLocalizedString(@"Could not parse file as CSV", @"Error when we can't parse/split file as CSV") ); + [importArray release], importArray = nil; [pool release]; return; } if (progressCancelled) { progressCancelled = NO; + [importArray release], importArray = nil; [pool release]; return; } @@ -608,6 +611,21 @@ //set up tableView currentRow = 0; + + // Sanity check the first row of the CSV to prevent hang loops caused by wrong line ending entry + if ([[importArray objectAtIndex:currentRow] count] > 512) { + NSBeginAlertSheet(NSLocalizedString(@"Error", @"Error"), + NSLocalizedString(@"OK", @"OK button"), + nil, nil, + tableWindow, self, + nil, nil, nil, + NSLocalizedString(@"The CSV was read as containing more than 512 columns, more than the maximum columns permitted for speed reasons by Sequel Pro.\n\nThis usually happens due to errors reading the CSV; please double-check the CSV to be imported and the line endings and escape characters at the bottom of the CSV selection dialog.", @"Error when CSV appears to have too many columns to import, probably due to line ending mismatch") + ); + [importArray release], importArray = nil; + [pool release]; + return; + } + if (fieldMappingArray) [fieldMappingArray release], fieldMappingArray = nil; [self setupFieldMappingArray]; [rowDownButton setEnabled:NO]; @@ -1504,145 +1522,230 @@ } -- (NSArray *)arrayForCSV:(NSString *)csv terminatedBy:(NSString *)terminated - enclosedBy:(NSString *)enclosed escapedBy:(NSString *)escaped lineEnds:(NSString *)lineEnds +- (NSArray *)arrayForCSV:(NSString *)csv terminatedBy:(NSString *)fieldEndString + enclosedBy:(NSString *)fieldQuoteString escapedBy:(NSString *)escapeString lineEnds:(NSString *)lineEndString /* loads a csv string into an array */ { - NSMutableString *tempTerminated, *tempLineEnds; + NSMutableString *tempInputString = [NSMutableString string]; NSMutableArray *tempArray = [NSMutableArray array]; NSMutableArray *tempRowArray = [NSMutableArray array]; - NSMutableString *mutableField; + NSMutableString *mutableField = [NSMutableString string]; NSScanner *scanner; - NSString *scanString; - NSMutableString *tempString = [NSMutableString string]; - NSMutableArray *linesArray = [NSMutableArray array]; - BOOL isEscaped, br; - int fieldCount = 0; - int x,i,j; - - //repare tabs and line ends - tempTerminated = [NSMutableString stringWithString:terminated]; - [tempTerminated replaceOccurrencesOfString:@"\\t" withString:@"\t" - options:NSLiteralSearch - range:NSMakeRange(0, [tempTerminated length])]; - [tempTerminated replaceOccurrencesOfString:@"\\n" withString:@"\n" - options:NSLiteralSearch - range:NSMakeRange(0, [tempTerminated length])]; - [tempTerminated replaceOccurrencesOfString:@"\\r" withString:@"\r" - options:NSLiteralSearch - range:NSMakeRange(0, [tempTerminated length])]; - terminated = [NSString stringWithString:tempTerminated]; - tempLineEnds = [NSMutableString stringWithString:lineEnds]; - [tempLineEnds replaceOccurrencesOfString:@"\\t" withString:@"\t" - options:NSLiteralSearch - range:NSMakeRange(0, [tempLineEnds length])]; - [tempLineEnds replaceOccurrencesOfString:@"\\n" withString:@"\n" - options:NSLiteralSearch - range:NSMakeRange(0, [tempLineEnds length])]; - [tempLineEnds replaceOccurrencesOfString:@"\\r" withString:@"\r" - options:NSLiteralSearch - range:NSMakeRange(0, [tempLineEnds length])]; - lineEnds = [NSString stringWithString:tempLineEnds]; - - //array with one line per object - scanner = [NSScanner scannerWithString:csv]; - [scanner setCharactersToBeSkipped:nil]; - - while ( ![scanner isAtEnd] && !progressCancelled) { - [tempString setString:@""]; - br = NO; - - while ( !br ) { - scanString = @""; - [scanner scanUpToString:lineEnds intoString:&scanString]; - [tempString appendString:scanString]; - [scanner scanString:lineEnds intoString:&scanString]; - - //test if lineEnds-character is escaped - isEscaped = NO; - j = 1; - if ( ![escaped isEqualToString:enclosed] && ![escaped isEqualToString:@""] ) { - while ( ((j*[escaped length])<=[tempString length]) && - ([[tempString substringWithRange:NSMakeRange(([tempString length]-(j*[escaped length])),[escaped length])] isEqualToString:escaped]) ) { - isEscaped = !isEscaped; - j++; - } - } - if ( !isEscaped || [scanner isAtEnd] ) { - //end of row - br = YES; - } else { - //lineEnds-character was escaped - [tempString appendString:scanString]; - } + NSString *scanString, *stringToLineEnd, *stringToFieldEnd; + NSString *escapedFieldEndString, *escapedFieldQuoteString, *escapedEscapeString, *escapedLineEndString; + NSString *nullString = [prefs objectForKey:@"NullValue"]; + NSCharacterSet *whitespaceSet = nil; + BOOL isEscaped, escapeStringIsFieldQuoteString, processingLine, processingField, fieldWasQuoted; + int fieldCount = NSNotFound; + int i,j,csvLength,fieldEndLength,fieldQuoteLength,escapeLength,lineEndLength,skipLength; + + // Fix tabs and line endings in the inputs + for (i = 0; i < 4; i++) { + switch (i) { + case 0: [tempInputString setString:fieldEndString]; break; + case 1: [tempInputString setString:fieldQuoteString]; break; + case 2: [tempInputString setString:escapeString]; break; + case 3: [tempInputString setString:lineEndString]; break; + } + [tempInputString replaceOccurrencesOfString:@"\\t" withString:@"\t" + options:NSLiteralSearch + range:NSMakeRange(0, [tempInputString length])]; + [tempInputString replaceOccurrencesOfString:@"\\n" withString:@"\n" + options:NSLiteralSearch + range:NSMakeRange(0, [tempInputString length])]; + [tempInputString replaceOccurrencesOfString:@"\\r" withString:@"\r" + options:NSLiteralSearch + range:NSMakeRange(0, [tempInputString length])]; + switch (i) { + case 0: fieldEndString = [NSString stringWithString:tempInputString]; break; + case 1: fieldQuoteString = [NSString stringWithString:tempInputString]; break; + case 2: escapeString = [NSString stringWithString:tempInputString]; break; + case 3: lineEndString = [NSString stringWithString:tempInputString]; break; } - - // Skip blank lines - if (![tempString length]) continue; - // Add the line to the array - [linesArray addObject:[NSString stringWithString:tempString]]; } + fieldEndLength = [fieldEndString length]; + fieldQuoteLength = [fieldQuoteString length]; + escapeLength = [escapeString length]; + lineEndLength = [lineEndString length]; + csvLength = [csv length]; + escapeStringIsFieldQuoteString = [fieldQuoteString isEqualToString:escapeString]; + escapedFieldEndString = [NSString stringWithFormat:@"%@%@", escapeString, fieldEndString]; + escapedFieldQuoteString = [NSString stringWithFormat:@"%@%@", escapeString, fieldQuoteString]; + escapedEscapeString = [NSString stringWithFormat:@"%@%@", escapeString, escapeString]; + escapedLineEndString = [NSString stringWithFormat:@"%@%@", escapeString, lineEndString]; + + // Set up characters it should be possible to trim + [tempInputString setString:@""]; + if (![fieldEndString isEqualToString:@" "] && ![fieldQuoteString isEqualToString:@" "] && ![escapeString isEqualToString:@" "] && ![lineEndString isEqualToString:@" "]) + [tempInputString appendString:@" "]; + if (![fieldEndString isEqualToString:@"\t"] && ![fieldQuoteString isEqualToString:@"\t"] && ![escapeString isEqualToString:@"\t"] && ![lineEndString isEqualToString:@"\t"]) + [tempInputString appendString:@"\t"]; + if ([tempInputString length]) whitespaceSet = [NSCharacterSet characterSetWithCharactersInString:tempInputString]; + + // Set up the scanner to process the CSV + scanner = [[NSScanner alloc] initWithString:csv]; + [scanner setCharactersToBeSkipped:nil]; - for ( x = 0 ; x < [linesArray count] && !progressCancelled; x++ ) { + while ( ![scanner isAtEnd] && !progressCancelled) { - //separate fields + // Scan the string line by line into an array for each row. + processingLine = YES; [tempRowArray removeAllObjects]; - [tempRowArray addObjectsFromArray:[self arrayForString:[linesArray objectAtIndex:x] enclosed:enclosed escaped:escaped terminated:terminated]]; - if ( x == 0 ) { - fieldCount = [tempRowArray count]; - } else { - while ( [tempRowArray count] < fieldCount ) { - [tempRowArray addObject:[NSString stringWithString:[prefs objectForKey:@"NullValue"]]]; - } - } - for ( i = 0 ; i < [tempRowArray count] ; i++ ) { - - // Insert a NSNull object if the cell contains an unescaped null character or an unescaped string - // which matches the NULL string set in preferences. - if ( [[tempRowArray objectAtIndex:i] isEqualToString:@"\\N"] || [[tempRowArray objectAtIndex:i] isEqualToString:[prefs objectForKey:@"NullValue"]] ) { - [tempRowArray replaceObjectAtIndex:i withObject:[NSNull null]]; - - } else { - - //strip enclosed and escaped characters - mutableField = [NSMutableString stringWithString:[tempRowArray objectAtIndex:i]]; - - //strip enclosed characters - if ( [mutableField length] >= (2*[enclosed length]) ) { - if ( [[mutableField substringToIndex:[enclosed length]] isEqualToString:enclosed] ) { - [mutableField deleteCharactersInRange:NSMakeRange(0,[enclosed length])]; - } - if ( [[mutableField substringFromIndex:([mutableField length]-[enclosed length])] isEqualToString:enclosed] ) { - [mutableField deleteCharactersInRange:NSMakeRange(([mutableField length]-[enclosed length]),[enclosed length])]; + while (![scanner isAtEnd] && processingLine) { + [mutableField setString:@""]; + processingField = YES; + fieldWasQuoted = NO; + + // Skip unescaped, unquoted whitespace where possible + if (whitespaceSet) [scanner scanCharactersFromSet:whitespaceSet intoString:nil]; + + i = [scanner scanLocation]; + + // Look at the next section of the string, and determine whether it's enclosed in the field quote string + if (fieldQuoteLength && i + fieldQuoteLength <= csvLength + && [[csv substringWithRange:NSMakeRange(i, fieldQuoteLength)] isEqualToString:fieldQuoteString]) + { + [scanner setScanLocation:i+fieldQuoteLength]; + fieldWasQuoted = YES; + + while (![scanner isAtEnd] && processingField) { + + // Process the field until the next quote string + if (![scanner scanUpToString:fieldQuoteString intoString:&scanString]) scanString = @""; + [mutableField appendString:scanString]; + + // Check to see if the quote string encountered was escaped... or an escaper + if (escapeLength) { + j = 1; + isEscaped = NO; + if (!escapeStringIsFieldQuoteString) { + while (j * escapeLength <= [scanString length] + && ([[mutableField substringWithRange:NSMakeRange(([mutableField length] - (j*escapeLength)), escapeLength)] isEqualToString:escapeString])) + { + isEscaped = !isEscaped; + j++; + } + skipLength = fieldQuoteLength; + } else { + if ([scanner scanLocation] + (2 * fieldQuoteLength) <= csvLength + && [[csv substringWithRange:NSMakeRange([scanner scanLocation] + fieldQuoteLength, fieldQuoteLength)] isEqualToString:fieldQuoteString]) + { + isEscaped = YES; + skipLength = 2 * fieldQuoteLength; + } + } + + // If it was escaped, keep processing the field + if (isEscaped) { + if (![scanner isAtEnd]) { + [mutableField appendString:[csv substringWithRange:NSMakeRange([scanner scanLocation], skipLength)]]; + [scanner setScanLocation:[scanner scanLocation] + skipLength]; + } + continue; + } } - } - if ( [mutableField length] >= [enclosed length] ) { - if ( [[mutableField substringFromIndex:([mutableField length]-[enclosed length])] isEqualToString:enclosed] ) { - [mutableField deleteCharactersInRange:NSMakeRange(([mutableField length]-[enclosed length]),[enclosed length])]; + + // We should now be at the end of the field - but let the code below keep going until + // the field end character is actually reached. + if (![scanner isAtEnd]) { + [scanner setScanLocation:[scanner scanLocation] + fieldQuoteLength]; + if (whitespaceSet) [scanner scanCharactersFromSet:whitespaceSet intoString:nil]; } + processingField = NO; } - //strip escaped characters - if ( ![enclosed isEqualToString:@""] ) { - [mutableField replaceOccurrencesOfString:[NSString stringWithFormat:@"%@%@", escaped, enclosed] withString:enclosed options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + } + + // Process until the next field end string *or* line end string, ugh! + processingField = YES; + while (![scanner isAtEnd] && processingField) { + i = [scanner scanLocation]; + if (![scanner scanUpToString:lineEndString intoString:&stringToLineEnd]) stringToLineEnd = @""; + [scanner setScanLocation:i]; + if (![scanner scanUpToString:fieldEndString intoString:&stringToFieldEnd]) stringToFieldEnd = @""; + if ([stringToFieldEnd length] < [stringToLineEnd length]) { + scanString = stringToFieldEnd; + skipLength = fieldEndLength; } else { - [mutableField replaceOccurrencesOfString:[NSString stringWithFormat:@"%@%@", escaped, terminated] withString:terminated options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + [scanner setScanLocation:i + [stringToLineEnd length]]; + scanString = stringToLineEnd; + processingLine = NO; + skipLength = lineEndLength; } - if ( ![lineEnds isEqualToString:@""] ) { - [mutableField replaceOccurrencesOfString:[NSString stringWithFormat:@"%@%@", escaped, lineEnds] withString:lineEnds options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + [mutableField appendString:scanString]; + + // Check to see if the termination character was escaped + if (escapeLength) { + j = 1; + isEscaped = NO; + while (j * escapeLength <= [scanString length] + && ([[mutableField substringWithRange:NSMakeRange(([mutableField length] - (j*escapeLength)), escapeLength)] isEqualToString:escapeString])) + { + isEscaped = !isEscaped; + j++; + } + + // If it was, continue processing the field + if (isEscaped) { + if (![scanner isAtEnd]) { + [mutableField appendString:[csv substringWithRange:NSMakeRange([scanner scanLocation], skipLength)]]; + [scanner setScanLocation:[scanner scanLocation] + skipLength]; + } + continue; + } } - if ( ![escaped isEqualToString:@""] && ![escaped isEqualToString:enclosed] ) { - [mutableField replaceOccurrencesOfString:[NSString stringWithFormat:@"%@%@", escaped, escaped] withString:escaped options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + + // We should be at the end of the field. + if (![scanner isAtEnd]) [scanner setScanLocation:[scanner scanLocation] + skipLength]; + processingField = NO; + } + + // We now have a field content string. + // Insert a NSNull object if the cell contains an unescaped null character or an unescaped string + // which matches the NULL string set in preferences. + if ([mutableField isEqualToString:@"\\N"] + || (!fieldWasQuoted && [mutableField isEqualToString:nullString])) + { + [tempRowArray addObject:[NSNull null]]; + } else { + + // Clean up escaped characters + if (escapeLength) { + if (fieldEndLength) + [mutableField replaceOccurrencesOfString:escapedFieldEndString withString:fieldEndString options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + if (fieldQuoteLength) + [mutableField replaceOccurrencesOfString:escapedFieldQuoteString withString:fieldQuoteString options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + if (lineEndLength) + [mutableField replaceOccurrencesOfString:escapedLineEndString withString:lineEndString options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; + [mutableField replaceOccurrencesOfString:escapedEscapeString withString:escapeString options:NSLiteralSearch range:NSMakeRange(0, [mutableField length])]; } - //add field to tempRowArray - [tempRowArray replaceObjectAtIndex:i withObject:[NSString stringWithString:mutableField]]; + + // Add the field to the row array + [tempRowArray addObject:[NSString stringWithString:mutableField]]; + } + } + + // Capture the length of the first row and ensure all other rows contain that many items + if (fieldCount == NSNotFound) { + fieldCount = [tempRowArray count]; + } else if ([tempRowArray count] < fieldCount) { + + // Skip empty rows + if ([tempRowArray count] == 0 + || ([tempRowArray count] == 1 && ([[tempRowArray objectAtIndex:0] isNSNull] || ![[tempRowArray objectAtIndex:0] length]))) + { + continue; } + + for (j = [tempRowArray count]; j < fieldCount; j++) [tempRowArray addObject:[NSNull null]]; } - //add row to tempArray + + // Add the row to the master output array [tempArray addObject:[NSArray arrayWithArray:tempRowArray]]; } - + [scanner release]; + return [NSArray arrayWithArray:tempArray]; } @@ -2297,6 +2400,31 @@ objectValueForTableColumn:(NSTableColumn *)aTableColumn } else if ([pathExtension isEqualToString:@"CSV"]) { [importFormatPopup selectItemWithTitle:@"CSV"]; [self changeFormat:self]; + + // Try to detect the line endings using "file" + NSTask *fileTask = [[NSTask alloc] init]; + NSPipe *filePipe = [[NSPipe alloc] init]; + + [fileTask setLaunchPath:@"/usr/bin/file"]; + [fileTask setArguments:[NSArray arrayWithObjects:@"-L", @"-b", [selectedFilenames objectAtIndex:0], nil]]; + [fileTask setStandardOutput:filePipe]; + NSFileHandle *fileHandle = [filePipe fileHandleForReading]; + + [fileTask launch]; + + NSString *fileCheckOutput = [[NSString alloc] initWithData:[fileHandle readDataToEndOfFile] encoding:NSASCIIStringEncoding]; + if (fileCheckOutput && [fileCheckOutput length]) { + NSString *lineEndingString = [fileCheckOutput stringByMatching:@"with ([A-Z]{2,4}) line terminators" capture:1L]; + if (!lineEndingString && [fileCheckOutput isMatchedByRegex:@"text"]) lineEndingString = @"LF"; + if (lineEndingString) { + if ([lineEndingString isEqualToString:@"LF"]) [importLinesTerminatedField setStringValue:@"\\n"]; + else if ([lineEndingString isEqualToString:@"CR"]) [importLinesTerminatedField setStringValue:@"\\r"]; + else if ([lineEndingString isEqualToString:@"CRLF"]) [importLinesTerminatedField setStringValue:@"\\r\\n"]; + } + } + + [fileTask release]; + [filePipe release]; } } |