aboutsummaryrefslogtreecommitdiffstats
path: root/Source
diff options
context:
space:
mode:
authorMax <post@wickenrode.com>2015-03-07 20:48:55 +0100
committerMax <post@wickenrode.com>2015-03-07 20:48:55 +0100
commit96b765ffbcb6c7fda058fbe8028b2e128007134a (patch)
treea4ac6c9fb75b5b810f7c42f1d159937caa6c2dda /Source
parented38f229a052a678d3a5022afd3806b1c3b434cf (diff)
downloadsequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.tar.gz
sequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.tar.bz2
sequelpro-96b765ffbcb6c7fda058fbe8028b2e128007134a.zip
Added an internal algorithm for fuzzy string matching
* This works similar to a regex matching "abc" as /a.*b.*c/ (ie. all characters of $needle need to be contained in $haystack in the correct order but not neccesarily consecutive). Additionaly some unicode equivalencies are handled. * Changed a tiny helper function from ObjC to plain C
Diffstat (limited to 'Source')
-rw-r--r--Source/SPStringAdditions.h19
-rw-r--r--Source/SPStringAdditions.m101
2 files changed, 109 insertions, 11 deletions
diff --git a/Source/SPStringAdditions.h b/Source/SPStringAdditions.h
index 3296326f..c6871e2a 100644
--- a/Source/SPStringAdditions.h
+++ b/Source/SPStringAdditions.h
@@ -81,4 +81,23 @@ static inline id NSMutableAttributedStringAttributeAtIndex(NSMutableAttributedSt
- (CGFloat)levenshteinDistanceWithWord:(NSString *)stringB;
+/**
+ * Checks if the string other is contained in self on a per-character basis.
+ * In regex-speak that would mean "abc" is matched as /a.*b.*c/ (not anchored).
+ * This is a SEARCH function, NOT a MATCHING function!
+ * Namely the following options will be applied when matching:
+ * NSCaseInsensitiveSearch|NSDiacriticInsensitiveSearch|NSDiacriticInsensitiveSearch
+ * Additionaly this method might match even when it should not.
+ *
+ * @param other String to match against self
+ * @param submatches Pass the pointer to a variable that will be set to an NSArray *
+ * of NSNumber *s of NSRanges. This will only be the case if
+ * the method also returns YES. The variable will not be modified
+ * otherwise.
+ * Pass NULL if you don't care for the ranges.
+ * The object will be set to autorelase.
+ * @return YES if self contains all characters from other in the order given in other
+ * @warning This method is NOT thread-safe (probably), NOT constant-time and DOES NOT check binary equivalence
+ */
+- (BOOL)nonConsecutivelySearchString:(NSString *)other matchingRanges:(NSArray **)submatches;
@end
diff --git a/Source/SPStringAdditions.m b/Source/SPStringAdditions.m
index 29b4144d..0c623af7 100644
--- a/Source/SPStringAdditions.m
+++ b/Source/SPStringAdditions.m
@@ -31,11 +31,7 @@
#import "SPStringAdditions.h"
#import "RegexKitLite.h"
-@interface NSString (PrivateAPI)
-
-- (NSInteger)_smallestOf:(NSInteger)a andOf:(NSInteger)b andOf:(NSInteger)c;
-
-@end
+static NSInteger _smallestOf(NSInteger a, NSInteger b, NSInteger c);
@implementation NSString (SPStringAdditions)
@@ -426,9 +422,9 @@
cost = ([stringA characterAtIndex:i - 1] == [stringB characterAtIndex:j - 1]) ? 0 : 1;
d[j * n + i] =
- [self _smallestOf:d[(j - 1) * n + i] + 1
- andOf:d[j * n + i - 1] + 1
- andOf:d[(j - 1) * n + i -1] + cost];
+ _smallestOf(d[(j - 1) * n + i] + 1,
+ d[j * n + i - 1] + 1,
+ d[(j - 1) * n + i -1] + cost);
}
distance = d[n * m - 1];
@@ -468,10 +464,95 @@
}
}
+- (BOOL)nonConsecutivelySearchString:(NSString *)other matchingRanges:(NSArray **)submatches
+{
+ NSStringCompareOptions opts = NSCaseInsensitiveSearch|NSDiacriticInsensitiveSearch|NSWidthInsensitiveSearch;
+ BOOL recordMatches = (submatches != NULL); //for readability
+ NSRange selfRange = NSMakeRange(0, [self length]);
+
+ //shortcut: * a longer string can never be contained in a shorter one.
+ // * nil can never match in a string.
+ // * an empty other can never match if self is non-empty
+ if(([other length] > [self length]) || (!other) || ([self length] && ![other length]))
+ return NO;
+
+ //handle the simple case via the default algorithm
+ if ([self compare:other options:opts] == NSOrderedSame) {
+ if(recordMatches) {
+ *submatches = [NSArray arrayWithObject:[NSValue valueWithRange:selfRange]];
+ }
+ return YES;
+ }
+
+ // for now let's save the overhead of NSArray and NSValues
+ NSRange *tmpMatchStore = recordMatches? calloc([other length], sizeof(NSRange)) : NULL;
+ __block NSUInteger matchCount = 0;
+ __block NSRange searchRange = selfRange;
+
+ //this looks a bit silly but is basically Apples approach to handling multibyte charsets
+ void (^it)(NSString *,NSRange,NSRange,BOOL *) = ^(NSString *substring,NSRange substringRange,NSRange enclosingRange,BOOL *stop) {
+ //look for the current character of other in the remaining part of self
+ NSRange found = [self rangeOfString:substring options:opts range:searchRange];
+ if(found.location == NSNotFound) {
+ matchCount = 0; //reset match count to "no match"
+ *stop = YES;
+ return;
+ }
+ if(recordMatches)
+ tmpMatchStore[matchCount] = found;
+ matchCount++;
+ //move the next search past the current match
+ searchRange.location = found.location + found.length;
+ searchRange.length = [self length] - searchRange.location;
+ };
+
+ [other enumerateSubstringsInRange:NSMakeRange(0, [other length])
+ options:NSStringEnumerationByComposedCharacterSequences
+ usingBlock:it];
+
+ if(matchCount && recordMatches) {
+ //we want to re-combine sub-matches that are actually consecutive
+
+ //This algorithm uses a simple look-behind for merges:
+ // Object 1 checks if it continues object 0. If so, 1 and 0 will merge
+ // and be placed in the slot of 0 (slot 1 is now invalid).
+ // Then object 2 checks if it continues object 0. If it does not, it is
+ // placed in slot 1.
+ // Object 3 then checks if it continues object 1 and so on...
+ NSUInteger mergeTarget = 0;
+ for (NSUInteger i = 1; i < matchCount; i++ ) {
+ NSRange prev = tmpMatchStore[mergeTarget];
+ NSRange this = tmpMatchStore[i];
+ //if the previous match ends right before this match begins we can merge them
+ if(prev.location + prev.length == this.location) {
+ NSRange mergedRange = NSMakeRange(prev.location, prev.length+this.length);
+ tmpMatchStore[mergeTarget] = mergedRange;
+ }
+ //otherwise we have to move on to the next and make ourselves the new base
+ else {
+ if(++mergeTarget != i)
+ tmpMatchStore[mergeTarget] = this;
+ }
+ }
+
+ NSMutableArray *combinedArray = [NSMutableArray arrayWithCapacity:mergeTarget+1];
+ for (NSUInteger j = 0; j <= mergeTarget; j++) {
+ [combinedArray addObject:[NSValue valueWithRange:tmpMatchStore[j]]];
+ }
+
+ *submatches = combinedArray;
+ }
+
+ free(tmpMatchStore); // free(NULL) is safe as per OS X man page
+ return (matchCount > 0);
+}
+
+@end
+
/**
* Returns the minimum of a, b and c.
*/
-- (NSInteger)_smallestOf:(NSInteger)a andOf:(NSInteger)b andOf:(NSInteger)c
+static NSInteger _smallestOf(NSInteger a, NSInteger b, NSInteger c)
{
NSInteger min = a;
@@ -481,5 +562,3 @@
return min;
}
-
-@end