diff options
Diffstat (limited to 'SBJsonTokeniser.m')
-rwxr-xr-x | SBJsonTokeniser.m | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/SBJsonTokeniser.m b/SBJsonTokeniser.m new file mode 100755 index 0000000..48aea2a --- /dev/null +++ b/SBJsonTokeniser.m @@ -0,0 +1,454 @@ +/* + Copyright (c) 2010-2011, Stig Brautaset. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + Neither the name of the the author nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#import "SBJsonTokeniser.h" +#import "SBJsonUTF8Stream.h" + +#define SBStringIsIllegalSurrogateHighCharacter(x) (((x) >= 0xd800) && ((x) <= 0xdfff)) + + +@implementation SBJsonTokeniser + +@synthesize error = _error; + +- (id)init { + self = [super init]; + if (self) { + _stream = [[SBJsonUTF8Stream alloc] init]; + + } + + return self; +} + +- (void)dealloc { + [_stream release]; + [super dealloc]; +} + +- (void)appendData:(NSData *)data_ { + [_stream appendData:data_]; +} + + +- (sbjson_token_t)match:(const char *)pattern length:(NSUInteger)len retval:(sbjson_token_t)token { + if (![_stream haveRemainingCharacters:len]) + return sbjson_token_eof; + + if ([_stream skipCharacters:pattern length:len]) + return token; + + self.error = [NSString stringWithFormat:@"Expected '%s' after initial '%.1s'", pattern, pattern]; + return sbjson_token_error; +} + +- (BOOL)decodeEscape:(unichar)ch into:(unichar*)decoded { + switch (ch) { + case '\\': + case '/': + case '"': + *decoded = ch; + break; + + case 'b': + *decoded = '\b'; + break; + + case 'n': + *decoded = '\n'; + break; + + case 'r': + *decoded = '\r'; + break; + + case 't': + *decoded = '\t'; + break; + + case 'f': + *decoded = '\f'; + break; + + default: + self.error = @"Illegal escape character"; + return NO; + break; + } + return YES; +} + +- (BOOL)decodeHexQuad:(unichar*)quad { + unichar c, tmp = 0; + + for (int i = 0; i < 4; i++) { + (void)[_stream getNextUnichar:&c]; + tmp *= 16; + switch (c) { + case '0' ... '9': + tmp += c - '0'; + break; + + case 'a' ... 'f': + tmp += 10 + c - 'a'; + break; + + case 'A' ... 'F': + tmp += 10 + c - 'A'; + break; + + default: + return NO; + } + } + *quad = tmp; + return YES; +} + +- (sbjson_token_t)getStringToken:(NSObject**)token { + NSMutableString *acc = nil; + + for (;;) { + [_stream skip]; + + unichar ch; + { + NSMutableString *string = nil; + if (![_stream getSimpleString:&string]) + return sbjson_token_eof; + + if (!string) { + self.error = @"Broken Unicode encoding"; + return sbjson_token_error; + } + + + if (![_stream getUnichar:&ch]) + return sbjson_token_eof; + + if (acc) { + [acc appendString:string]; + + } else if (ch == '"') { + *token = string; + [_stream skip]; + return sbjson_token_string; + + } else { + acc = [[string mutableCopy] autorelease]; + } + } + + switch (ch) { + case 0 ... 0x1F: + self.error = [NSString stringWithFormat:@"Unescaped control character [0x%0.2X]", (int)ch]; + return sbjson_token_error; + break; + + case '"': + *token = acc; + [_stream skip]; + return sbjson_token_string; + break; + + case '\\': + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + + if (ch == 'u') { + if (![_stream haveRemainingCharacters:5]) + return sbjson_token_eof; + + unichar hi; + if (![self decodeHexQuad:&hi]) { + self.error = @"Invalid hex quad"; + return sbjson_token_error; + } + + if (CFStringIsSurrogateHighCharacter(hi)) { + unichar lo; + + if (![_stream haveRemainingCharacters:6]) + return sbjson_token_eof; + + (void)[_stream getNextUnichar:&ch]; + (void)[_stream getNextUnichar:&lo]; + if (ch != '\\' || lo != 'u' || ![self decodeHexQuad:&lo]) { + self.error = @"Missing low character in surrogate pair"; + return sbjson_token_error; + } + + if (!CFStringIsSurrogateLowCharacter(lo)) { + self.error = @"Invalid low character in surrogate pair"; + return sbjson_token_error; + } + + unichar pair[2] = {hi, lo}; + CFStringAppendCharacters((CFMutableStringRef)acc, pair, 2); + } else if (SBStringIsIllegalSurrogateHighCharacter(hi)) { + self.error = @"Invalid high character in surrogate pair"; + return sbjson_token_error; + } else { + CFStringAppendCharacters((CFMutableStringRef)acc, &hi, 1); + } + + + } else { + unichar decoded; + if (![self decodeEscape:ch into:&decoded]) + return sbjson_token_error; + CFStringAppendCharacters((CFMutableStringRef)acc, &decoded, 1); + } + + break; + + default: { + self.error = [NSString stringWithFormat:@"Invalid UTF-8: '%x'", (int)ch]; + return sbjson_token_error; + break; + } + } + } + return sbjson_token_eof; +} + +- (sbjson_token_t)getNumberToken:(NSObject**)token { + + NSUInteger numberStart = _stream.index; + NSCharacterSet *digits = [NSCharacterSet decimalDigitCharacterSet]; + + unichar ch; + if (![_stream getUnichar:&ch]) + return sbjson_token_eof; + + BOOL isNegative = NO; + if (ch == '-') { + isNegative = YES; + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + } + + if (ch == '0') { + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + + if ([digits characterIsMember:ch]) { + self.error = @"Leading zero is illegal in number"; + return sbjson_token_error; + } + } + + unsigned long long mantissa = 0; + int mantissa_length = 0; + + while ([digits characterIsMember:ch]) { + mantissa *= 10; + mantissa += (ch - '0'); + mantissa_length++; + + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + } + + short exponent = 0; + BOOL isFloat = NO; + + if (ch == '.') { + isFloat = YES; + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + + while ([digits characterIsMember:ch]) { + mantissa *= 10; + mantissa += (ch - '0'); + mantissa_length++; + exponent--; + + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + } + + if (!exponent) { + self.error = @"No digits after decimal point"; + return sbjson_token_error; + } + } + + BOOL hasExponent = NO; + if (ch == 'e' || ch == 'E') { + hasExponent = YES; + + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + + BOOL expIsNegative = NO; + if (ch == '-') { + expIsNegative = YES; + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + + } else if (ch == '+') { + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + } + + short exp = 0; + short exp_length = 0; + while ([digits characterIsMember:ch]) { + exp *= 10; + exp += (ch - '0'); + exp_length++; + + if (![_stream getNextUnichar:&ch]) + return sbjson_token_eof; + } + + if (exp_length == 0) { + self.error = @"No digits in exponent"; + return sbjson_token_error; + } + + if (expIsNegative) + exponent -= exp; + else + exponent += exp; + } + + if (!mantissa_length && isNegative) { + self.error = @"No digits after initial minus"; + return sbjson_token_error; + + } else if (mantissa_length >= 19) { + + NSString *number = [_stream stringWithRange:NSMakeRange(numberStart, _stream.index - numberStart)]; + *token = [NSDecimalNumber decimalNumberWithString:number]; + + } else if (!isFloat && !hasExponent) { + if (!isNegative) + *token = [NSNumber numberWithUnsignedLongLong:mantissa]; + else + *token = [NSNumber numberWithLongLong:-mantissa]; + } else { + *token = [NSDecimalNumber decimalNumberWithMantissa:mantissa + exponent:exponent + isNegative:isNegative]; + } + + return sbjson_token_number; +} + +- (sbjson_token_t)getToken:(NSObject **)token { + + [_stream skipWhitespace]; + + unichar ch; + if (![_stream getUnichar:&ch]) + return sbjson_token_eof; + + NSUInteger oldIndexLocation = _stream.index; + sbjson_token_t tok; + + switch (ch) { + case '[': + tok = sbjson_token_array_start; + [_stream skip]; + break; + + case ']': + tok = sbjson_token_array_end; + [_stream skip]; + break; + + case '{': + tok = sbjson_token_object_start; + [_stream skip]; + break; + + case ':': + tok = sbjson_token_keyval_separator; + [_stream skip]; + break; + + case '}': + tok = sbjson_token_object_end; + [_stream skip]; + break; + + case ',': + tok = sbjson_token_separator; + [_stream skip]; + break; + + case 'n': + tok = [self match:"null" length:4 retval:sbjson_token_null]; + break; + + case 't': + tok = [self match:"true" length:4 retval:sbjson_token_true]; + break; + + case 'f': + tok = [self match:"false" length:5 retval:sbjson_token_false]; + break; + + case '"': + tok = [self getStringToken:token]; + break; + + case '0' ... '9': + case '-': + tok = [self getNumberToken:token]; + break; + + case '+': + self.error = @"Leading + is illegal in number"; + tok = sbjson_token_error; + break; + + default: + self.error = [NSString stringWithFormat:@"Illegal start of token [%c]", ch]; + tok = sbjson_token_error; + break; + } + + if (tok == sbjson_token_eof) { + // We ran out of bytes in the middle of a token. + // We don't know how to restart in mid-flight, so + // rewind to the start of the token for next attempt. + // Hopefully we'll have more data then. + _stream.index = oldIndexLocation; + } + + return tok; +} + + +@end |