diff options
author | rowanbeentje <rowan@beent.je> | 2010-07-29 00:57:11 +0000 |
---|---|---|
committer | rowanbeentje <rowan@beent.je> | 2010-07-29 00:57:11 +0000 |
commit | 67bebbe2f9e6222c0a33be77c2d04495026f2437 (patch) | |
tree | 5ce673e3c9307a78686195d054f80205ac6c02a9 /Source | |
parent | 550c112216171aa81487b32c41d7aa64adc2ec7f (diff) | |
download | sequelpro-67bebbe2f9e6222c0a33be77c2d04495026f2437.tar.gz sequelpro-67bebbe2f9e6222c0a33be77c2d04495026f2437.tar.bz2 sequelpro-67bebbe2f9e6222c0a33be77c2d04495026f2437.zip |
- Add support for automatically converting \r and \r\n linebreaks in query syntax to \n. This addresses Issue #652.
- Clean up SPSQLParser, and use a few more CFString methods to avoid obj-c messaging in loops
- When importing SQL using autodetect, perform a manual SET NAMES if possible to cope with poorly-exported files.
Diffstat (limited to 'Source')
-rw-r--r-- | Source/SPCustomQuery.m | 28 | ||||
-rw-r--r-- | Source/SPDataImport.m | 24 | ||||
-rw-r--r-- | Source/SPSQLParser.h | 72 | ||||
-rw-r--r-- | Source/SPSQLParser.m | 403 |
4 files changed, 367 insertions, 160 deletions
diff --git a/Source/SPCustomQuery.m b/Source/SPCustomQuery.m index 520370d2..ec5f8347 100644 --- a/Source/SPCustomQuery.m +++ b/Source/SPCustomQuery.m @@ -72,11 +72,23 @@ queryParser = [[SPSQLParser alloc] initWithString:[textView string]]; [queryParser setDelimiterSupport:YES]; queries = [queryParser splitStringByCharacter:';']; + + // If carriage returns were found, normalise the queries + if ([queryParser containsCarriageReturns]) { + NSMutableArray *normalisedQueries = [NSMutableArray arrayWithCapacity:[queries count]]; + for (NSString *query in queries) { + [normalisedQueries addObject:[SPSQLParser normaliseQueryForExecution:query]]; + } + queries = normalisedQueries; + } + [queryParser release]; oldThreadedQueryRange = [textView selectedRange]; - // Unselect a selection if given to avoid interferring with error highlighting + + // Unselect a selection if given to avoid interfering with error highlighting [textView setSelectedRange:NSMakeRange(oldThreadedQueryRange.location, 0)]; + // Reset queryStartPosition queryStartPosition = 0; @@ -132,7 +144,7 @@ NSBeep(); return; } - queries = [NSArray arrayWithObject:query]; + queries = [NSArray arrayWithObject:[SPSQLParser normaliseQueryForExecution:query]]; // Remember query start position for error highlighting queryTextViewStartPosition = currentQueryRange.location; @@ -142,6 +154,16 @@ queryParser = [[SPSQLParser alloc] initWithString:[[textView string] substringWithRange:selectedRange]]; [queryParser setDelimiterSupport:YES]; queries = [queryParser splitStringByCharacter:';']; + + // If carriage returns were found, normalise the queries + if ([queryParser containsCarriageReturns]) { + NSMutableArray *normalisedQueries = [NSMutableArray arrayWithCapacity:[queries count]]; + for (NSString *query in queries) { + [normalisedQueries addObject:[SPSQLParser normaliseQueryForExecution:query]]; + } + queries = normalisedQueries; + } + [queryParser release]; // Remember query start position for error highlighting @@ -551,7 +573,7 @@ if (i > 0) { NSString *taskString = [NSString stringWithFormat:NSLocalizedString(@"Running query %ld of %lu...", @"Running multiple queries string"), (long)(i+1), (unsigned long)queryCount]; - [tableDocumentInstance setTaskDescription:taskString]; + [[tableDocumentInstance onMainThread] setTaskDescription:taskString]; [[errorText onMainThread] setStringValue:taskString]; } diff --git a/Source/SPDataImport.m b/Source/SPDataImport.m index 6b20787a..e697ec18 100644 --- a/Source/SPDataImport.m +++ b/Source/SPDataImport.m @@ -358,6 +358,7 @@ BOOL importSQLAsUTF8 = YES; BOOL allDataRead = NO; NSStringEncoding sqlEncoding = NSUTF8StringEncoding; + NSString *connectionEncodingToRestore = nil; NSCharacterSet *whitespaceAndNewlineCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet]; // Start the notification timer to allow notifications to be shown, even if frontmost, for long queries @@ -404,6 +405,10 @@ [fileEncodingDetector analyzeData:[detectorFileHandle readDataOfLength:2500000]]; sqlEncoding = [fileEncodingDetector encoding]; [fileEncodingDetector release]; + if ([MCPConnection mySQLEncodingForStringEncoding:sqlEncoding]) { + connectionEncodingToRestore = [tableDocumentInstance connectionEncoding]; + [mySQLConnection queryString:[NSString stringWithFormat:@"SET NAMES '%@'", [MCPConnection mySQLEncodingForStringEncoding:sqlEncoding]]]; + } } // Otherwise, get the encoding to use from the menu @@ -425,6 +430,9 @@ // Report file read errors, and bail @catch (NSException *exception) { + if (connectionEncodingToRestore) { + [mySQLConnection queryString:[NSString stringWithFormat:@"SET NAMES '%@'", connectionEncodingToRestore]]; + } [self closeAndStopProgressSheet]; SPBeginAlertSheet(NSLocalizedString(@"File read error", @"SQL read error title"), NSLocalizedString(@"OK", @"OK button"), @@ -466,6 +474,9 @@ sqlString = [[NSString alloc] initWithData:[sqlDataBuffer subdataWithRange:NSMakeRange(dataBufferLastQueryEndPosition, dataBufferPosition - dataBufferLastQueryEndPosition)] encoding:sqlEncoding]; if (!sqlString) { + if (connectionEncodingToRestore) { + [mySQLConnection queryString:[NSString stringWithFormat:@"SET NAMES '%@'", connectionEncodingToRestore]]; + } [self closeAndStopProgressSheet]; NSString *displayEncoding; if (![importEncodingPopup indexOfSelectedItem]) { @@ -522,9 +533,15 @@ while (query = [sqlParser trimAndReturnStringToCharacter:';' trimmingInclusively:YES returningInclusively:NO]) { if (progressCancelled) break; fileProcessedLength += [query lengthOfBytesUsingEncoding:sqlEncoding] + 1; - + + // Ensure whitespace is removed from both ends, and normalise if necessary. + if ([sqlParser containsCarriageReturns]) { + query = [SPSQLParser normaliseQueryForExecution:query]; + } else { + query = [query stringByTrimmingCharactersInSet:whitespaceAndNewlineCharset]; + } + // Skip blank or whitespace-only queries to avoid errors - query = [query stringByTrimmingCharactersInSet:whitespaceAndNewlineCharset]; if (![query length]) continue; // Run the query @@ -575,6 +592,9 @@ } // Clean up + if (connectionEncodingToRestore) { + [mySQLConnection queryString:[NSString stringWithFormat:@"SET NAMES '%@'", connectionEncodingToRestore]]; + } [sqlParser release]; [sqlDataBuffer release]; [importPool drain]; diff --git a/Source/SPSQLParser.h b/Source/SPSQLParser.h index 1331d60f..bbdefcd8 100644 --- a/Source/SPSQLParser.h +++ b/Source/SPSQLParser.h @@ -65,6 +65,7 @@ NSInteger charCacheStart; NSInteger charCacheEnd; BOOL ignoreCommentStrings; + BOOL containsCRs; BOOL supportDelimiters; NSString *delimiter; @@ -78,7 +79,14 @@ typedef enum _SPCommentTypes { SPCStyleComment = 2 } SPCommentType; -/* +/** + * Return whether any carriage returns have been encountered during + * parsing; quoted strings are not included. May be used to determine + * whether text needs to be normalised. + */ +- (BOOL)containsCarriageReturns; + +/** * Set whether comment strings should be ignored during parsing. * Normally, comment strings are treated as dead space and ignored; * for certain parsing operations, characters within comments need @@ -87,7 +95,7 @@ typedef enum _SPCommentTypes { */ - (void) setIgnoreCommentStrings:(BOOL)ignoringCommentStrings; -/* +/** * Set whether DELIMITER support should be enabled while parsing. * This is off by default; when switched on, delimiters commands will * be parsed out and not returned to the calling class, and any active @@ -96,12 +104,12 @@ typedef enum _SPCommentTypes { */ - (void) setDelimiterSupport:(BOOL)shouldSupportDelimiters; -/* +/** * Removes comments within the current string, trimming "#", "--[/s]", and "⁄* *⁄" style strings. */ - (void) deleteComments; -/* +/** * Removes quotes surrounding the string if present, and un-escapes internal occurrences of the quote character, * before returning the resulting string. * If no quotes surround the current string, return the entire string; if the current string contains several @@ -109,7 +117,13 @@ typedef enum _SPCommentTypes { */ - (NSString *) unquotedString; -/* +/** + * Normalise a string, readying it for queries - trims whitespace from both + * ends, and ensures line endings which aren't in quotes are LF. + */ ++ (NSString *) normaliseQueryForExecution:(NSString *)queryString; + +/** * Removes characters from the string up to the first occurrence of the supplied character. * "inclusively" controls whether the supplied character is also removed. * Quoted strings are automatically ignored when looking for the character. @@ -118,13 +132,13 @@ typedef enum _SPCommentTypes { */ - (BOOL) trimToCharacter:(unichar)character inclusively:(BOOL)inclusive; -/* +/** * As trimToCharacter: ..., but allows control over whether characters within quoted strings * are ignored. */ - (BOOL) trimToCharacter:(unichar)character inclusively:(BOOL)inclusive ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * Returns an NSString containing characters from the string up to the first occurrence of the supplied character. * "inclusively" controls whether the supplied character is also returned. * Quoted strings are automatically ignored when looking for the character. @@ -133,13 +147,13 @@ typedef enum _SPCommentTypes { */ - (NSString *) stringToCharacter:(unichar)character inclusively:(BOOL)inclusive; -/* +/** * As stringToCharacter: ..., but allows control over whether characters within quoted strings * are ignored. */ - (NSString *) stringToCharacter:(unichar)character inclusively:(BOOL)inclusive ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * Returns an NSString containing characters from the string up to the first occurrence of the supplied * character, also removing them from the string. "trimmingInclusively" controls whether or not the * supplied character is removed from the string on a successful match, while "returningInclusively" @@ -150,13 +164,13 @@ typedef enum _SPCommentTypes { */ - (NSString *) trimAndReturnStringToCharacter:(unichar)character trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn; -/* +/** * As trimAndReturnStringToCharacter: ..., but allows control over whether characters within quoted * strings are ignored. */ - (NSString *) trimAndReturnStringToCharacter:(unichar)character trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * Returns characters from the string up to and from the first occurrence of the supplied opening character * to the appropriate occurrence of the supplied closing character. "inclusively" controls whether the supplied * characters should also be returned. @@ -166,24 +180,24 @@ typedef enum _SPCommentTypes { */ - (NSString *) stringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter inclusively:(BOOL)inclusive; -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over whether to skip * over bracket-enclosed characters, as in subqueries, enums, definitions or groups */ - (NSString *) stringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter inclusively:(BOOL)inclusive skippingBrackets:(BOOL)skipBrackets; -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over whether characters within quoted * strings are ignored. */ - (NSString *) stringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter inclusively:(BOOL)inclusive ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over both bracketing and quoting. */ - (NSString *) stringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter inclusively:(BOOL)inclusive skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * As stringFromCharacter: toCharacter: ..., but also trims the string up to the "to" character and * up to or including the "from" character, depending on whether "trimmingInclusively" is set. * "returningInclusively" controls whether the supplied characters should also be returned. @@ -191,25 +205,25 @@ typedef enum _SPCommentTypes { */ - (NSString *) trimAndReturnStringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn; -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over whether to * skip over bracket-enclosed characters, as in subqueries, enums, definitions or groups. */ - (NSString *) trimAndReturnStringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn skippingBrackets:(BOOL)skipBrackets; -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over whether characters * within quoted strings are ignored. */ - (NSString *) trimAndReturnStringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over both bracketing * and quoting. */ - (NSString *) trimAndReturnStringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter trimmingInclusively:(BOOL)inclusiveTrim returningInclusively:(BOOL)inclusiveReturn skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * Split a string on the boundaries formed by the supplied character, returning an array of strings. * Quoted strings are automatically ignored when looking for the characters. * SQL comments are automatically ignored when looking for the characters. @@ -217,24 +231,24 @@ typedef enum _SPCommentTypes { */ - (NSArray *) splitStringByCharacter:(unichar)character; -/* +/** * As splitStringByCharacter: ..., but allows control over whether to skip over bracket-enclosed * characters, as in subqueries, enums, definitions or groups. */ - (NSArray *) splitStringByCharacter:(unichar)character skippingBrackets:(BOOL)skipBrackets; -/* +/** * As splitStringByCharacter:, but allows control over whether characters * within quoted strings are ignored. */ - (NSArray *) splitStringByCharacter:(unichar)character ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * As splitStringByCharacter: ..., but allows control over both bracketing and quoting. */ - (NSArray *) splitStringByCharacter:(unichar)character skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; -/* +/** * As splitStringByCharacter:, but returning only the ranges of queries, stored as NSValues. * Quoted strings are automatically ignored when looking for the characters. * SQL comments are automatically ignored when looking for the characters. @@ -242,7 +256,7 @@ typedef enum _SPCommentTypes { */ - (NSArray *) splitStringIntoRangesByCharacter:(unichar)character; -/* +/** * Methods used internally by this class to power the methods above: */ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character ignoringQuotedStrings:(BOOL)ignoreQuotedStrings; @@ -251,14 +265,6 @@ typedef enum _SPCommentTypes { - (NSUInteger) endIndexOfStringQuotedByCharacter:(unichar)quoteCharacter startingAtIndex:(NSInteger)index; - (NSUInteger) endIndexOfCommentOfType:(SPCommentType)commentType startingAtIndex:(NSInteger)index; -/* - * Cacheing methods to enable a faster alternative to characterAtIndex: when walking strings, and overrides to update. - */ -- (unichar) charAtIndex:(NSInteger)index; -- (void) clearCharCache; -- (void) deleteCharactersInRange:(NSRange)aRange; -- (void) insertString:(NSString *)aString atIndex:(NSUInteger)anIndex; - /* Required and primitive methods to allow subclassing class cluster */ #pragma mark - - (id) init; @@ -278,6 +284,8 @@ typedef enum _SPCommentTypes { - (NSUInteger) replaceOccurrencesOfString:(NSString *)target withString:(NSString *)replacement options:(NSUInteger)opts range:(NSRange)searchRange; - (void) setString:(NSString *)string; - (void) replaceCharactersInRange:(NSRange)range withString:(NSString *)string; +- (void) deleteCharactersInRange:(NSRange)aRange; +- (void) insertString:(NSString *)aString atIndex:(NSUInteger)anIndex; - (void) dealloc; @end diff --git a/Source/SPSQLParser.m b/Source/SPSQLParser.m index f8112767..fb125f43 100644 --- a/Source/SPSQLParser.m +++ b/Source/SPSQLParser.m @@ -26,7 +26,7 @@ #import "SPSQLParser.h" #import "RegexKitLite.h" -/* +/** * Include all the extern variables and prototypes required for flex (used for syntax highlighting) */ #import "SPSQLTokenizer.h" @@ -36,13 +36,37 @@ typedef struct to_buffer_state *TO_BUFFER_STATE; void to_switch_to_buffer(TO_BUFFER_STATE); TO_BUFFER_STATE to_scan_string (const char *); -/* +@interface SPSQLParser (PrivateAPI) + +- (unichar) _charAtIndex:(NSInteger)index; +- (void) _clearCharCache; + +@end + + +/** * Please see the header files for a general description of the purpose of this class, * and increased overview detail for the functions below. */ @implementation SPSQLParser : NSMutableString -/* +#pragma mark - +#pragma mark Parser information + +/** + * Return whether any carriage returns have been encountered during + * parsing; quoted strings are not included. May be used to determine + * whether text needs to be normalised. + */ +- (BOOL)containsCarriageReturns +{ + return containsCRs; +} + +#pragma mark - +#pragma mark Parser behaviour setting + +/** * Control whether comment strings should be skipped during parsing. */ - (void)setIgnoreCommentStrings:(BOOL)ignoringCommentStrings @@ -50,7 +74,7 @@ TO_BUFFER_STATE to_scan_string (const char *); ignoreCommentStrings = ignoringCommentStrings; } -/* +/** * Control whether DELIMITER commands are recognised and used to override * supported characters. */ @@ -59,8 +83,10 @@ TO_BUFFER_STATE to_scan_string (const char *); supportDelimiters = shouldSupportDelimiters; } +#pragma mark - +#pragma mark SQL-aware utility methods -/* +/** * Removes comments within the current string, trimming "#", "--[/s]", and "⁄* *⁄" style strings. */ - (void) deleteComments @@ -71,7 +97,7 @@ TO_BUFFER_STATE to_scan_string (const char *); // Walk along the string, processing characters. for (currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) { - currentCharacter = [string characterAtIndex:currentStringIndex]; + currentCharacter = CFStringGetCharacterAtIndex(string ,currentStringIndex); switch (currentCharacter) { // When quote characters are encountered walk to the end of the quoted string. @@ -88,8 +114,8 @@ TO_BUFFER_STATE to_scan_string (const char *); // For comments starting "--[\s]", ensure the start syntax is valid before proceeding. case '-': if (stringLength < currentStringIndex + 2) break; - if ([string characterAtIndex:currentStringIndex+1] != '-') break; - if (![[NSCharacterSet whitespaceCharacterSet] characterIsMember:[string characterAtIndex:currentStringIndex+2]]) break; + if (CFStringGetCharacterAtIndex(string, currentStringIndex+1) != '-') break; + if (![[NSCharacterSet whitespaceCharacterSet] characterIsMember:CFStringGetCharacterAtIndex(string, currentStringIndex+2)]) break; commentEndIndex = [self endIndexOfCommentOfType:SPDoubleDashComment startingAtIndex:currentStringIndex]; // Remove the comment @@ -110,7 +136,7 @@ TO_BUFFER_STATE to_scan_string (const char *); // For comments starting "/*", ensure the start syntax is valid before proceeding. case '/': if (stringLength < currentStringIndex + 1) break; - if ([string characterAtIndex:currentStringIndex+1] != '*') break; + if (CFStringGetCharacterAtIndex(string, currentStringIndex+1) != '*') break; commentEndIndex = [self endIndexOfCommentOfType:SPCStyleComment startingAtIndex:currentStringIndex]; // Remove the comment @@ -122,8 +148,7 @@ TO_BUFFER_STATE to_scan_string (const char *); } } - -/* +/** * Removes quotes surrounding the string if present, and un-escapes internal occurrences of the quote character before returning. */ - (NSString *) unquotedString @@ -135,7 +160,7 @@ TO_BUFFER_STATE to_scan_string (const char *); if (![string length]) return nil; // If the first character is not a quote character, return the entire string. - quoteCharacter = [string characterAtIndex:0]; + quoteCharacter = CFStringGetCharacterAtIndex(string, 0); if (quoteCharacter != '`' && quoteCharacter != '"' && quoteCharacter != '\'') { return [NSString stringWithString:string]; } @@ -164,8 +189,122 @@ TO_BUFFER_STATE to_scan_string (const char *); return returnString; } +/** + * Normalise a string, readying it for queries - trims whitespace from both + * ends, and ensures line endings which aren't in quotes are LF. + */ ++ (NSString *) normaliseQueryForExecution:(NSString *)queryString +{ + NSUInteger stringLength = [queryString length]; + NSCharacterSet *trimCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet]; + + // Check the ends of the string for whitespace, to determine if it needs removing + NSUInteger whitespaceCharsAtStart = 0; + NSUInteger whitespaceCharsAtEnd = 0; + while (whitespaceCharsAtStart < stringLength && [trimCharset characterIsMember:CFStringGetCharacterAtIndex(queryString, whitespaceCharsAtStart)]) + whitespaceCharsAtStart++; + while (whitespaceCharsAtEnd < stringLength && [trimCharset characterIsMember:CFStringGetCharacterAtIndex(queryString, stringLength - whitespaceCharsAtEnd - 1)]) + whitespaceCharsAtEnd++; + + // Trim if necessary + if (whitespaceCharsAtStart || whitespaceCharsAtEnd) { + stringLength -= whitespaceCharsAtStart + whitespaceCharsAtEnd; + queryString = [queryString substringWithRange:NSMakeRange(whitespaceCharsAtStart, stringLength)]; + } + + // Check for carriage returns in the string + NSMutableArray *carriageReturnPositions = [NSMutableArray array]; + NSUInteger currentStringIndex, innerStringIndex, i, quotedStringLength; + unichar currentCharacter, innerCharacter; + BOOL characterIsEscaped; + for (currentStringIndex = 0; currentStringIndex < stringLength; currentStringIndex++) { + currentCharacter = CFStringGetCharacterAtIndex(queryString, currentStringIndex); + switch (currentCharacter) { + + // When quote characters are encountered walk to the end of the quoted string. + case '\'': + case '"': + case '`': + for (innerStringIndex = currentStringIndex+1; innerStringIndex < stringLength; innerStringIndex++) { + innerCharacter = CFStringGetCharacterAtIndex(queryString, innerStringIndex); + + // If the string end is a backtick and one has been encountered, treat it as end of string + if (innerCharacter == '`' && currentCharacter == '`') { + + // ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both. + if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex(queryString, innerStringIndex+1) == '`') { + innerStringIndex++; + continue; + } + + currentStringIndex = innerStringIndex; + break; + + // Otherwise, prepare to treat the string as ended when meeting the correct boundary character.... + } else if (innerCharacter == currentCharacter) { + + // ...but only if the string end isn't escaped with an *odd* number of escaping characters... + characterIsEscaped = NO; + i = 1; + quotedStringLength = innerStringIndex - 1; + while ((quotedStringLength - i) > 0 && CFStringGetCharacterAtIndex(queryString, innerStringIndex - i) == '\\') { + characterIsEscaped = !characterIsEscaped; + i++; + } -/* + // If an even number have been found, it may be the end of the string - as long as the subsequent character + // isn't also the same character, in which case it's another form of escaping. + if (!characterIsEscaped) { + if ((innerStringIndex + 1) < stringLength && CFStringGetCharacterAtIndex(queryString, innerStringIndex+1) == currentCharacter) { + innerStringIndex++; + continue; + } + + // Really is the end of the string. + currentStringIndex = innerStringIndex; + break; + } + } + } + + // The quoted string has been left open - end processing. + currentStringIndex = innerStringIndex; + break; + + case '\r': + [carriageReturnPositions addObject:[NSNumber numberWithUnsignedInteger:currentStringIndex]]; + break; + } + } + + if ([carriageReturnPositions count]) { + NSMutableString *normalisedString = [NSMutableString stringWithString:queryString]; + BOOL isCRLF; + NSUInteger CRLocation; + for (NSNumber *position in carriageReturnPositions) { + CRLocation = [position unsignedIntegerValue]; + + // Check whether it's a CRLF or just a CR + isCRLF = NO; + if ([normalisedString length] > CRLocation + 1 && CFStringGetCharacterAtIndex(normalisedString, CRLocation + 1) == '\n') isCRLF = YES; + + // Normalise the line endings + if (isCRLF) { + [normalisedString deleteCharactersInRange:NSMakeRange(CRLocation, 1)]; + } else { + [normalisedString replaceCharactersInRange:NSMakeRange(CRLocation, 1) withString:@"\n"]; + } + } + queryString = normalisedString; + } + + return queryString; +} + +#pragma mark - +#pragma mark Trimming or retrieving strings from the front of the string + +/** * Removes characters from the string up to the first occurrence of the supplied character. */ - (BOOL) trimToCharacter:(unichar)character inclusively:(BOOL)inclusive @@ -173,8 +312,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self trimToCharacter:character inclusively:inclusive ignoringQuotedStrings:YES]; } - -/* +/** * As trimToCharacter: ..., but allows control over whether characters within quoted * strings are ignored. */ @@ -193,8 +331,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return YES; } - -/* +/** * Returns an NSString containing characters from the string up to the first occurrence of the supplied character. */ - (NSString *) stringToCharacter:(unichar)character inclusively:(BOOL)inclusive @@ -202,8 +339,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self stringToCharacter:character inclusively:inclusive ignoringQuotedStrings:YES]; } - -/* +/** * As stringToCharacter: ..., but allows control over whether characters within quoted strings * are ignored. */ @@ -222,8 +358,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [string substringWithRange:NSMakeRange(returnFromPosition, stringIndex + (inclusive?1:0) - returnFromPosition)]; } - -/* +/** * Returns an NSString containing characters from the string up to the first occurrence of the supplied * character, also removing them from the string. */ @@ -232,8 +367,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self trimAndReturnStringToCharacter:character trimmingInclusively:inclusiveTrim returningInclusively:inclusiveReturn ignoringQuotedStrings:YES]; } - -/* +/** * As trimAndReturnStringToCharacter: ..., but allows control over whether characters within quoted * strings are ignored. */ @@ -261,8 +395,10 @@ TO_BUFFER_STATE to_scan_string (const char *); return resultString; } +#pragma mark - +#pragma mark Trimming or retrieving strings from one specified character to another -/* +/** * Returns characters from the string up to and from the first occurrence of the supplied opening character * to the appropriate occurrence of the supplied closing character. "inclusively" controls whether the supplied * characters should also be returned. @@ -272,8 +408,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self stringFromCharacter:fromCharacter toCharacter:toCharacter inclusively:inclusive skippingBrackets:NO ignoringQuotedStrings:YES]; } - -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over whether to skip * over bracket-enclosed characters, as in subqueries, enums, definitions or groups */ @@ -282,8 +417,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self stringFromCharacter:fromCharacter toCharacter:toCharacter inclusively:inclusive skippingBrackets:skipBrackets ignoringQuotedStrings:YES]; } - -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over whether characters within quoted * strings are ignored. */ @@ -292,8 +426,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self stringFromCharacter:fromCharacter toCharacter:toCharacter inclusively:inclusive skippingBrackets:NO ignoringQuotedStrings:ignoreQuotedStrings]; } - -/* +/** * As stringFromCharacter: toCharacter: ..., but allows control over both bracketing and quoting. */ - (NSString *) stringFromCharacter:(unichar)fromCharacter toCharacter:(unichar)toCharacter inclusively:(BOOL)inclusive skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings @@ -317,8 +450,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [string substringWithRange:NSMakeRange(fromCharacterIndex + (inclusive?0:1), toCharacterIndex + (inclusive?1:-1) - fromCharacterIndex)]; } - -/* +/** * As stringFromCharacter: toCharacter: ..., but also trims the string up to the "to" character and * up to or including the "from" character, depending on whether "trimmingInclusively" is set. */ @@ -327,8 +459,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self trimAndReturnStringFromCharacter:fromCharacter toCharacter:toCharacter trimmingInclusively:inclusiveTrim returningInclusively:inclusiveReturn skippingBrackets:NO ignoringQuotedStrings:YES]; } - -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over whether to * skip over bracket-enclosed characters, as in subqueries, enums, definitions or groups. */ @@ -338,7 +469,7 @@ TO_BUFFER_STATE to_scan_string (const char *); } -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over whether characters * within quoted strings are ignored. */ @@ -348,7 +479,7 @@ TO_BUFFER_STATE to_scan_string (const char *); } -/* +/** * As trimAndReturnStringFromCharacter: toCharacter: ..., but allows control over both bracketing * and quoting. */ @@ -376,7 +507,10 @@ TO_BUFFER_STATE to_scan_string (const char *); return resultString; } -/* +#pragma mark - +#pragma mark Splitting strings + +/** * Split a string on the boundaries formed by the supplied character, returning an array of strings. */ - (NSArray *) splitStringByCharacter:(unichar)character @@ -384,7 +518,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self splitStringByCharacter:character skippingBrackets:NO ignoringQuotedStrings:YES]; } -/* +/** * As splitStringByCharacter: ..., but allows control over whether to skip over bracket-enclosed * characters, as in subqueries, enums, definitions or groups. */ @@ -393,8 +527,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self splitStringByCharacter:character skippingBrackets:skipBrackets ignoringQuotedStrings:YES]; } - -/* +/** * As splitStringByCharacter:, but allows control over whether characters * within quoted strings are ignored. */ @@ -403,7 +536,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return [self splitStringByCharacter:character skippingBrackets:NO ignoringQuotedStrings:ignoreQuotedStrings]; } -/* +/** * As splitStringByCharacter: ..., but allows control over both bracketing and quoting. */ - (NSArray *) splitStringByCharacter:(unichar)character skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings @@ -447,8 +580,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return resultsArray; } - -/* +/** * As splitStringByCharacter:, but returning only the ranges of queries, stored as NSValues. */ - (NSArray *) splitStringIntoRangesByCharacter:(unichar)character @@ -492,25 +624,37 @@ TO_BUFFER_STATE to_scan_string (const char *); return resultsArray; } +#pragma mark - +#pragma mark SQL-aware character lookups (mostly for internal use) -/* - * A method intended for use by the functions above. +/** + * A shortcut method for looking up the first occurrence of a character in + * the string. Brackets aren't processed, quoted strings are processed according + * to the supplied argument, and comments are processed according to the setting on + * the object. */ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character ignoringQuotedStrings:(BOOL)ignoreQuotedStrings { return [self firstOccurrenceOfCharacter:character afterIndex:-1 skippingBrackets:NO ignoringQuotedStrings:ignoreQuotedStrings]; } - -/* - * A method intended for use by the functions above. +/** + * A shortcut method for looking up the first occurrence of a character in + * the string after a specified start index. Brackets aren't processed, quoted + * strings are processed according to the supplied argument, and comments are + * processed according to the setting on the object. */ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteger)startIndex ignoringQuotedStrings:(BOOL)ignoreQuotedStrings { return [self firstOccurrenceOfCharacter:character afterIndex:startIndex skippingBrackets:NO ignoringQuotedStrings:ignoreQuotedStrings]; } - +/** + * Look for the first occurrence of a character, in SQL-aware form - with support + * for skipping bracketed or quoted ranges. + * Comments are also skipped depending on the setting for this object. + * Mostly intended for internal use, but available externally. + */ - (NSUInteger) firstOccurrenceOfCharacter:(unichar)character afterIndex:(NSInteger)startIndex skippingBrackets:(BOOL)skipBrackets ignoringQuotedStrings:(BOOL)ignoreQuotedStrings { NSUInteger currentStringIndex, quotedStringEndIndex; @@ -520,7 +664,7 @@ TO_BUFFER_STATE to_scan_string (const char *); lastMatchIsDelimiter = NO; // Cache frequently used selectors, avoiding dynamic binding overhead - IMP charAtIndex = [self methodForSelector:@selector(charAtIndex:)]; + IMP charAtIndex = [self methodForSelector:@selector(_charAtIndex:)]; IMP endIndex = [self methodForSelector:@selector(endIndexOfStringQuotedByCharacter:startingAtIndex:)]; IMP substringWithRange = [self methodForSelector:@selector(substringWithRange:)]; @@ -529,7 +673,7 @@ TO_BUFFER_STATE to_scan_string (const char *); // Walk along the string, processing characters for (currentStringIndex = startIndex + 1; currentStringIndex < stringLength; currentStringIndex++) { - currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex); + currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex); // Check for the ending character, and if it has been found and quoting/brackets is valid, return. // If delimiter support is active and a delimiter is set, check for the delimiter @@ -575,8 +719,8 @@ TO_BUFFER_STATE to_scan_string (const char *); // For comments starting "--[\s]", ensure the start syntax is valid before proceeding. case '-': if (stringLength < currentStringIndex + 2) break; - if ((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+1) != '-') break; - if (![[NSCharacterSet whitespaceCharacterSet] characterIsMember:(unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+2)]) break; + if ((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+1) != '-') break; + if (![[NSCharacterSet whitespaceCharacterSet] characterIsMember:(unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+2)]) break; currentStringIndex = [self endIndexOfCommentOfType:SPDoubleDashComment startingAtIndex:currentStringIndex]; break; @@ -589,10 +733,15 @@ TO_BUFFER_STATE to_scan_string (const char *); case '/': if(ignoreCommentStrings) break; if (stringLength < currentStringIndex + 1) break; - if ((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+1) != '*') break; + if ((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+1) != '*') break; currentStringIndex = [self endIndexOfCommentOfType:SPCStyleComment startingAtIndex:currentStringIndex]; break; + // Capture whether carriage returns are encountered + case '\r': + if (!containsCRs) containsCRs = YES; + break; + // Check for delimiter strings, by first checking letter-by-letter to "deli" for speed (as there's no default // commands which start with it), and then switching to regex for simplicty. case 'd': @@ -602,15 +751,15 @@ TO_BUFFER_STATE to_scan_string (const char *); // and that the "d" is the start of a word if (supportDelimiters && stringLength >= currentStringIndex + 11 && (currentStringIndex == 0 - || [[NSCharacterSet whitespaceAndNewlineCharacterSet] characterIsMember:(unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex-1)])) + || [[NSCharacterSet whitespaceAndNewlineCharacterSet] characterIsMember:(unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex-1)])) { - switch((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+1)) { + switch((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+1)) { case 'e': case 'E': - switch((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+2)) { + switch((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+2)) { case 'l': case 'L': - switch((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+3)) { + switch((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+3)) { case 'i': case 'I': if([self isMatchedByRegex:@"^(delimiter[ \\t]+(\\S+))(?=\\s)" @@ -651,8 +800,9 @@ TO_BUFFER_STATE to_scan_string (const char *); return NSNotFound; } - -/* +/** + * Walk along the string and locate the end of a quoted string, taking + * into account the various forms of SQL escaping. * A method intended for use by the functions above. */ - (NSUInteger) endIndexOfStringQuotedByCharacter:(unichar)quoteCharacter startingAtIndex:(NSInteger)index @@ -663,19 +813,19 @@ TO_BUFFER_STATE to_scan_string (const char *); unichar currentCharacter; // Cache the charAtIndex selector, avoiding dynamic binding overhead - IMP charAtIndex = [self methodForSelector:@selector(charAtIndex:)]; + IMP charAtIndex = [self methodForSelector:@selector(_charAtIndex:)]; stringLength = [string length]; // Walk the string looking for the string end for ( currentStringIndex = index; currentStringIndex < stringLength; currentStringIndex++) { - currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex); + currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex); // If the string end is a backtick and one has been encountered, treat it as end of string if (quoteCharacter == '`' && currentCharacter == '`') { // ...as long as the next character isn't also a backtick, in which case it's being quoted. Skip both. - if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+1) == '`') { + if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+1) == '`') { currentStringIndex++; continue; } @@ -689,7 +839,7 @@ TO_BUFFER_STATE to_scan_string (const char *); characterIsEscaped = NO; i = 1; quotedStringLength = currentStringIndex - 1; - while ((quotedStringLength - i) > 0 && (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex - i) == '\\') { + while ((quotedStringLength - i) > 0 && (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex - i) == '\\') { characterIsEscaped = !characterIsEscaped; i++; } @@ -697,7 +847,7 @@ TO_BUFFER_STATE to_scan_string (const char *); // If an even number have been found, it may be the end of the string - as long as the subsequent character // isn't also the same character, in which case it's another form of escaping. if (!characterIsEscaped) { - if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), currentStringIndex+1) == quoteCharacter) { + if ((currentStringIndex + 1) < stringLength && (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), currentStringIndex+1) == quoteCharacter) { currentStringIndex++; continue; } @@ -711,7 +861,7 @@ TO_BUFFER_STATE to_scan_string (const char *); return NSNotFound; } -/* +/** * A method intended for use by the functions above. */ - (NSUInteger) endIndexOfCommentOfType:(SPCommentType)commentType startingAtIndex:(NSInteger)index @@ -720,7 +870,7 @@ TO_BUFFER_STATE to_scan_string (const char *); unichar currentCharacter; // Cache the charAtIndex selector, avoiding dynamic binding overhead - IMP charAtIndex = [self methodForSelector:@selector(charAtIndex:)]; + IMP charAtIndex = [self methodForSelector:@selector(_charAtIndex:)]; switch (commentType) { @@ -733,7 +883,8 @@ TO_BUFFER_STATE to_scan_string (const char *); case SPHashComment: index++; for ( ; index < stringLength; index++ ) { - currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), index); + currentCharacter = (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), index); + if (currentCharacter == '\r') containsCRs = YES; if (currentCharacter == '\r' || currentCharacter == '\n') { return index-1; } @@ -745,8 +896,8 @@ TO_BUFFER_STATE to_scan_string (const char *); case SPCStyleComment: index = index+2; for ( ; index < stringLength; index++ ) { - if ((unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), index) == '*') { - if ((stringLength > index + 1) && (unichar)(long)(*charAtIndex)(self, @selector(charAtIndex:), index+1) == '/') { + if ((unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), index) == '*') { + if ((stringLength > index + 1) && (unichar)(long)(*charAtIndex)(self, @selector(_charAtIndex:), index+1) == '/') { return (index+1); } } @@ -757,55 +908,9 @@ TO_BUFFER_STATE to_scan_string (const char *); return (stringLength-1); } -/* - * Provide a method to retrieve a character from the local cache. - * Does no bounds checking on the underlying string, and so is kept - * separate for characterAtIndex:. - */ -- (unichar) charAtIndex:(NSInteger)index -{ - - // If the current cache doesn't include the current character, update it. - if (index > charCacheEnd || index < charCacheStart) { - if (charCacheEnd > -1) { - free(stringCharCache); - } - NSUInteger remainingStringLength = [string length] - index; - NSUInteger newcachelength = (CHARACTER_CACHE_LENGTH < remainingStringLength)?CHARACTER_CACHE_LENGTH:remainingStringLength; - stringCharCache = (unichar *)calloc(newcachelength, sizeof(unichar)); - [string getCharacters:stringCharCache range:NSMakeRange(index, newcachelength)]; - charCacheEnd = index + newcachelength - 1; - charCacheStart = index; - } - return stringCharCache[index - charCacheStart]; -} - -/* - * Provide a method to cleat the cache, and use it when updating the string. - */ -- (void) clearCharCache -{ - if (charCacheEnd > -1) { - free(stringCharCache); - } - charCacheEnd = -1; - charCacheStart = 0; - parsedToChar = '\0'; - parsedToPosition = -1; -} -- (void) deleteCharactersInRange:(NSRange)aRange -{ - [super deleteCharactersInRange:aRange]; - [self clearCharCache]; -} -- (void) insertString:(NSString *)aString atIndex:(NSUInteger)anIndex -{ - [super insertString:aString atIndex:anIndex]; - [self clearCharCache]; -} - -/* Required and primitive methods to allow subclassing class cluster */ #pragma mark - +#pragma mark Required and primitive methods to allow subclassing the class cluster + - (id) init { if (self = [super init]) { @@ -883,31 +988,39 @@ TO_BUFFER_STATE to_scan_string (const char *); delimiter = nil; delimiterLengthMinusOne = 0; lastMatchIsDelimiter = NO; - + containsCRs = NO; } - (NSUInteger) length { return [string length]; } - (unichar) characterAtIndex:(NSUInteger)index { - return [string characterAtIndex:index]; + return CFStringGetCharacterAtIndex(string, index); } - (id) description { return [string description]; } - (NSUInteger) replaceOccurrencesOfString:(NSString *)target withString:(NSString *)replacement options:(NSUInteger)options range:(NSRange)searchRange { return [string replaceOccurrencesOfString:target withString:replacement options:options range:searchRange]; - [self clearCharCache]; + [self _clearCharCache]; } - (void) setString:(NSString *)aString { [string setString:aString]; delimiter = nil; delimiterLengthMinusOne = 0; lastMatchIsDelimiter = NO; - [self clearCharCache]; + [self _clearCharCache]; } - (void) replaceCharactersInRange:(NSRange)range withString:(NSString *)aString { [string replaceCharactersInRange:range withString:aString]; - [self clearCharCache]; + [self _clearCharCache]; +} +- (void) deleteCharactersInRange:(NSRange)aRange { + [super deleteCharactersInRange:aRange]; + [self _clearCharCache]; +} +- (void) insertString:(NSString *)aString atIndex:(NSUInteger)anIndex { + [super insertString:aString atIndex:anIndex]; + [self _clearCharCache]; } - (void) dealloc { [string release]; @@ -916,3 +1029,47 @@ TO_BUFFER_STATE to_scan_string (const char *); } @end + +#pragma mark - + +@implementation SPSQLParser (PrivateAPI) + +/** + * Provide a method to retrieve a character from the local cache. + * Does no bounds checking on the underlying string, and so is kept + * separate from characterAtIndex:. + */ +- (unichar) _charAtIndex:(NSInteger)index +{ + + // If the current cache doesn't include the current character, update it. + if (index > charCacheEnd || index < charCacheStart) { + if (charCacheEnd > -1) { + free(stringCharCache); + } + NSUInteger remainingStringLength = [string length] - index; + NSUInteger newcachelength = (CHARACTER_CACHE_LENGTH < remainingStringLength)?CHARACTER_CACHE_LENGTH:remainingStringLength; + stringCharCache = (unichar *)calloc(newcachelength, sizeof(unichar)); + CFStringGetCharacters(string, CFRangeMake(index, newcachelength), stringCharCache); + charCacheEnd = index + newcachelength - 1; + charCacheStart = index; + } + return stringCharCache[index - charCacheStart]; +} + +/** + * Provide a method to clear the cache, which should be used whenever + * the underlying string is updated. + */ +- (void) _clearCharCache +{ + if (charCacheEnd > -1) { + free(stringCharCache); + } + charCacheEnd = -1; + charCacheStart = 0; + parsedToChar = '\0'; + parsedToPosition = -1; +} + +@end
\ No newline at end of file |