diff options
Diffstat (limited to 'Source/SPFieldMapperController.m')
-rw-r--r-- | Source/SPFieldMapperController.m | 111 |
1 files changed, 89 insertions, 22 deletions
diff --git a/Source/SPFieldMapperController.m b/Source/SPFieldMapperController.m index e70a7c25..b77ca98e 100644 --- a/Source/SPFieldMapperController.m +++ b/Source/SPFieldMapperController.m @@ -29,6 +29,7 @@ #import "SPTextView.h" #import "SPTableView.h" #import "SPCategoryAdditions.h" +#import "RegexKitLite.h" #define SP_NUMBER_OF_RECORDS_STRING NSLocalizedString(@"%ld of %@%lu records", @"Label showing the index of the selected CSV row") @@ -120,7 +121,7 @@ static const NSString *SPTableViewSqlColumnID = @"sql"; [pc setURL:[NSURL fileURLWithPath:sourcePath]]; if([pc pathComponentCells]) [fileSourcePath setPathComponentCells:[pc pathComponentCells]]; - [fileSourcePath setDoubleAction:@selector(goBackToFileChooser:)]; + [fileSourcePath setDoubleAction:@selector(goBackToFileChooserFromPathControl:)]; [onupdateTextView setDelegate:theDelegate]; windowMinWidth = [[self window] minSize].width; @@ -700,15 +701,24 @@ static const NSString *SPTableViewSqlColumnID = @"sql"; if(possibleImports < 1) return; + // Set all operators to doNotImport + [fieldMappingOperatorArray removeAllObjects]; + for(i=0; i < [fieldMappingTableColumnNames count]; i++) + [fieldMappingOperatorArray addObject:doNotImport]; + switch([[alignByPopup selectedItem] tag]) { case 0: // file order - for(i=0; i<possibleImports; i++) + for(i=0; i<possibleImports; i++) { [fieldMappingArray replaceObjectAtIndex:i withObject:[NSNumber numberWithInteger:i]]; + [fieldMappingOperatorArray replaceObjectAtIndex:i withObject:doImport]; + } break; case 1: // reversed file order possibleImports--; - for(i=possibleImports; i>=0; i--) + for(i=possibleImports; i>=0; i--) { [fieldMappingArray replaceObjectAtIndex:possibleImports-i withObject:[NSNumber numberWithInteger:i]]; + [fieldMappingOperatorArray replaceObjectAtIndex:possibleImports-i withObject:doImport]; + } break; case 2: // try to align header and table target field names via Levenshtein distance [self matchHeaderNames]; @@ -763,14 +773,21 @@ static const NSString *SPTableViewSqlColumnID = @"sql"; } } +- (IBAction)goBackToFileChooserFromPathControl:(id)sender +{ + [gobackButton performSelector:@selector(performClick:) withObject:nil afterDelay:0.0f]; +} + - (IBAction)goBackToFileChooser:(id)sender { + [NSApp endSheet:[self window] returnCode:[sender tag]]; - if([sourcePath hasPrefix:SPImportClipboardTempFileNamePrefix]) { + + if([sourcePath hasPrefix:SPImportClipboardTempFileNamePrefix]) [theDelegate importFromClipboard]; - } else { + else [theDelegate importFile]; - } + } - (IBAction)newTable:(id)sender @@ -1365,34 +1382,84 @@ static const NSString *SPTableViewSqlColumnID = @"sql"; NSMutableArray *tableHeaderNames = [NSMutableArray array]; [tableHeaderNames setArray:fieldMappingTableColumnNames]; + // Create a distance matrix for each file-table name + // distance will be calculated by using Levenshtein distance minus common prefix and suffix length + // and minus the length of a fuzzy regex search for a common sequence of characters NSInteger i,j; - NSMutableArray *matchedHeaderNames = [NSMutableArray array]; + NSMutableArray *distMatrix = [NSMutableArray array]; for(i=0; i < [tableHeaderNames count]; i++) { - CGFloat minDist = 1e6; + CGFloat minDist = 1e6; NSInteger minIndex = 0; + CGFloat dist = 1e6; for(j=0; j < [fileHeaderNames count]; j++) { id fileHeaderName = NSArrayObjectAtIndex(fileHeaderNames,j); if([fileHeaderName isKindOfClass:[NSNull class]] || [fileHeaderName isSPNotLoaded]) continue; NSString *headerName = [(NSString*)fileHeaderName lowercaseString]; - CGFloat dist = [[NSArrayObjectAtIndex(tableHeaderNames,i) lowercaseString] levenshteinDistanceWithWord:headerName]; - if(dist < minDist && ![matchedHeaderNames containsObject:headerName]) { - minDist = dist; - minIndex = j; + NSString *tableHeadName = [NSArrayObjectAtIndex(tableHeaderNames,i) lowercaseString]; + dist = [tableHeadName levenshteinDistanceWithWord:headerName]; + + // if dist > 0 subtract the length of common prefixes, suffixes, and in common sequence characters + if(dist > 0.0) { + dist -= [[tableHeadName commonPrefixWithString:headerName options:NSCaseInsensitiveSearch] length]; + dist -= [[tableHeadName commonPrefixWithString:headerName options:NSCaseInsensitiveSearch|NSBackwardsSearch] length]; + + NSMutableString *fuzzyRegexp = [[NSMutableString alloc] initWithCapacity:3]; + NSInteger i; + unichar c; + + for(i=0; i<[headerName length]; i++) { + c = [headerName characterAtIndex:i]; + if (c == '.' || c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}') + [fuzzyRegexp appendFormat:@".*?\\%c",c]; + else + [fuzzyRegexp appendFormat:@".*?%c",c]; + } + dist -= [tableHeadName rangeOfRegex:fuzzyRegexp].length; + [fuzzyRegexp release]; + + } else { + // Levenshtein distance == 0 means that both names are equal set dist to + // a large negative number since dist can be negative due to search for in common chars + dist = -1e6; } - if(dist == 0.0f) [matchedHeaderNames addObject:headerName]; + + [distMatrix addObject:[NSDictionary dictionaryWithObjectsAndKeys: + [NSNumber numberWithFloat:dist], @"dist", + NSStringFromRange(NSMakeRange(i,j)), @"match", + (NSString*)fileHeaderName, @"file", + NSArrayObjectAtIndex(tableHeaderNames,i), @"table", + nil]]; + } - [fieldMappingArray replaceObjectAtIndex:i withObject:[NSNumber numberWithInteger:minIndex]]; - [fieldMappingOperatorArray replaceObjectAtIndex:i withObject:doImport]; + } - // If a pair with distance 0 was found set doNotImport to those fields which are still mapped - // to such csv file header name - if([matchedHeaderNames count]) - for(i=0; i < [tableHeaderNames count]; i++) { - NSString *mappedFileHeaderName = [NSArrayObjectAtIndex(fileHeaderNames, [[fieldMappingArray objectAtIndex:i] integerValue]) lowercaseString]; - if([matchedHeaderNames containsObject:mappedFileHeaderName] && ![mappedFileHeaderName isEqualToString:[NSArrayObjectAtIndex(tableHeaderNames, i) lowercaseString]]) - [fieldMappingOperatorArray replaceObjectAtIndex:i withObject:doNotImport]; + // Sort the matrix according distance + NSSortDescriptor *sortByDistance = [[[NSSortDescriptor alloc] initWithKey:@"dist" ascending:TRUE] autorelease]; + [distMatrix sortUsingDescriptors:[NSArray arrayWithObjects:sortByDistance, nil]]; + + NSMutableArray *matchedFile = [NSMutableArray array]; + NSMutableArray *matchedTable = [NSMutableArray array]; + NSInteger cnt = 0; + for(NSDictionary* m in distMatrix) { + if(![matchedFile containsObject:[m objectForKey:@"file"]] && ![matchedTable containsObject:[m objectForKey:@"table"]]) { + + NSRange match = NSRangeFromString([m objectForKey:@"match"]); + + // Set best match + [fieldMappingArray replaceObjectAtIndex:match.location withObject:[NSNumber numberWithInteger:match.length]]; + [fieldMappingOperatorArray replaceObjectAtIndex:match.location withObject:doImport]; + + // Remember matched pair + [matchedTable addObject:[m objectForKey:@"table"]]; + [matchedFile addObject:[m objectForKey:@"file"]]; + cnt++; } + + // break if all file names are mapped + if(cnt >= [fileHeaderNames count]) break; + + } } /* |