diff options
author | Max <post@wickenrode.com> | 2015-03-07 20:48:55 +0100 |
---|---|---|
committer | Max <post@wickenrode.com> | 2015-03-07 20:48:55 +0100 |
commit | 96b765ffbcb6c7fda058fbe8028b2e128007134a (patch) | |
tree | a4ac6c9fb75b5b810f7c42f1d159937caa6c2dda /Source | |
parent | ed38f229a052a678d3a5022afd3806b1c3b434cf (diff) | |
download | sequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.tar.gz sequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.tar.bz2 sequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.zip |
Added an internal algorithm for fuzzy string matching
* This works similar to a regex matching "abc" as /a.*b.*c/ (ie. all characters of $needle need to be contained in $haystack in the correct order but not neccesarily consecutive). Additionaly some unicode equivalencies are handled.
* Changed a tiny helper function from ObjC to plain C
Diffstat (limited to 'Source')
-rw-r--r-- | Source/SPStringAdditions.h | 19 | ||||
-rw-r--r-- | Source/SPStringAdditions.m | 101 |
2 files changed, 109 insertions, 11 deletions
diff --git a/Source/SPStringAdditions.h b/Source/SPStringAdditions.h index 3296326f..c6871e2a 100644 --- a/Source/SPStringAdditions.h +++ b/Source/SPStringAdditions.h @@ -81,4 +81,23 @@ static inline id NSMutableAttributedStringAttributeAtIndex(NSMutableAttributedSt - (CGFloat)levenshteinDistanceWithWord:(NSString *)stringB; +/** + * Checks if the string other is contained in self on a per-character basis. + * In regex-speak that would mean "abc" is matched as /a.*b.*c/ (not anchored). + * This is a SEARCH function, NOT a MATCHING function! + * Namely the following options will be applied when matching: + * NSCaseInsensitiveSearch|NSDiacriticInsensitiveSearch|NSDiacriticInsensitiveSearch + * Additionaly this method might match even when it should not. + * + * @param other String to match against self + * @param submatches Pass the pointer to a variable that will be set to an NSArray * + * of NSNumber *s of NSRanges. This will only be the case if + * the method also returns YES. The variable will not be modified + * otherwise. + * Pass NULL if you don't care for the ranges. + * The object will be set to autorelase. + * @return YES if self contains all characters from other in the order given in other + * @warning This method is NOT thread-safe (probably), NOT constant-time and DOES NOT check binary equivalence + */ +- (BOOL)nonConsecutivelySearchString:(NSString *)other matchingRanges:(NSArray **)submatches; @end diff --git a/Source/SPStringAdditions.m b/Source/SPStringAdditions.m index 29b4144d..0c623af7 100644 --- a/Source/SPStringAdditions.m +++ b/Source/SPStringAdditions.m @@ -31,11 +31,7 @@ #import "SPStringAdditions.h" #import "RegexKitLite.h" -@interface NSString (PrivateAPI) - -- (NSInteger)_smallestOf:(NSInteger)a andOf:(NSInteger)b andOf:(NSInteger)c; - -@end +static NSInteger _smallestOf(NSInteger a, NSInteger b, NSInteger c); @implementation NSString (SPStringAdditions) @@ -426,9 +422,9 @@ cost = ([stringA characterAtIndex:i - 1] == [stringB characterAtIndex:j - 1]) ? 0 : 1; d[j * n + i] = - [self _smallestOf:d[(j - 1) * n + i] + 1 - andOf:d[j * n + i - 1] + 1 - andOf:d[(j - 1) * n + i -1] + cost]; + _smallestOf(d[(j - 1) * n + i] + 1, + d[j * n + i - 1] + 1, + d[(j - 1) * n + i -1] + cost); } distance = d[n * m - 1]; @@ -468,10 +464,95 @@ } } +- (BOOL)nonConsecutivelySearchString:(NSString *)other matchingRanges:(NSArray **)submatches +{ + NSStringCompareOptions opts = NSCaseInsensitiveSearch|NSDiacriticInsensitiveSearch|NSWidthInsensitiveSearch; + BOOL recordMatches = (submatches != NULL); //for readability + NSRange selfRange = NSMakeRange(0, [self length]); + + //shortcut: * a longer string can never be contained in a shorter one. + // * nil can never match in a string. + // * an empty other can never match if self is non-empty + if(([other length] > [self length]) || (!other) || ([self length] && ![other length])) + return NO; + + //handle the simple case via the default algorithm + if ([self compare:other options:opts] == NSOrderedSame) { + if(recordMatches) { + *submatches = [NSArray arrayWithObject:[NSValue valueWithRange:selfRange]]; + } + return YES; + } + + // for now let's save the overhead of NSArray and NSValues + NSRange *tmpMatchStore = recordMatches? calloc([other length], sizeof(NSRange)) : NULL; + __block NSUInteger matchCount = 0; + __block NSRange searchRange = selfRange; + + //this looks a bit silly but is basically Apples approach to handling multibyte charsets + void (^it)(NSString *,NSRange,NSRange,BOOL *) = ^(NSString *substring,NSRange substringRange,NSRange enclosingRange,BOOL *stop) { + //look for the current character of other in the remaining part of self + NSRange found = [self rangeOfString:substring options:opts range:searchRange]; + if(found.location == NSNotFound) { + matchCount = 0; //reset match count to "no match" + *stop = YES; + return; + } + if(recordMatches) + tmpMatchStore[matchCount] = found; + matchCount++; + //move the next search past the current match + searchRange.location = found.location + found.length; + searchRange.length = [self length] - searchRange.location; + }; + + [other enumerateSubstringsInRange:NSMakeRange(0, [other length]) + options:NSStringEnumerationByComposedCharacterSequences + usingBlock:it]; + + if(matchCount && recordMatches) { + //we want to re-combine sub-matches that are actually consecutive + + //This algorithm uses a simple look-behind for merges: + // Object 1 checks if it continues object 0. If so, 1 and 0 will merge + // and be placed in the slot of 0 (slot 1 is now invalid). + // Then object 2 checks if it continues object 0. If it does not, it is + // placed in slot 1. + // Object 3 then checks if it continues object 1 and so on... + NSUInteger mergeTarget = 0; + for (NSUInteger i = 1; i < matchCount; i++ ) { + NSRange prev = tmpMatchStore[mergeTarget]; + NSRange this = tmpMatchStore[i]; + //if the previous match ends right before this match begins we can merge them + if(prev.location + prev.length == this.location) { + NSRange mergedRange = NSMakeRange(prev.location, prev.length+this.length); + tmpMatchStore[mergeTarget] = mergedRange; + } + //otherwise we have to move on to the next and make ourselves the new base + else { + if(++mergeTarget != i) + tmpMatchStore[mergeTarget] = this; + } + } + + NSMutableArray *combinedArray = [NSMutableArray arrayWithCapacity:mergeTarget+1]; + for (NSUInteger j = 0; j <= mergeTarget; j++) { + [combinedArray addObject:[NSValue valueWithRange:tmpMatchStore[j]]]; + } + + *submatches = combinedArray; + } + + free(tmpMatchStore); // free(NULL) is safe as per OS X man page + return (matchCount > 0); +} + +@end + /** * Returns the minimum of a, b and c. */ -- (NSInteger)_smallestOf:(NSInteger)a andOf:(NSInteger)b andOf:(NSInteger)c +static NSInteger _smallestOf(NSInteger a, NSInteger b, NSInteger c) { NSInteger min = a; @@ -481,5 +562,3 @@ return min; } - -@end |