aboutsummaryrefslogtreecommitdiffstats
path: root/Source/SPJSONFormatter.m
diff options
context:
space:
mode:
Diffstat (limited to 'Source/SPJSONFormatter.m')
-rw-r--r--Source/SPJSONFormatter.m364
1 files changed, 364 insertions, 0 deletions
diff --git a/Source/SPJSONFormatter.m b/Source/SPJSONFormatter.m
new file mode 100644
index 00000000..05cc2992
--- /dev/null
+++ b/Source/SPJSONFormatter.m
@@ -0,0 +1,364 @@
+//
+// SPJSONFormatter.m
+// sequel-pro
+//
+// Created by Max Lohrmann on 10.02.17.
+// Copyright (c) 2017 Max Lohrmann. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// More info at <https://github.com/sequelpro/sequelpro>
+
+#import "SPJSONFormatter.h"
+
+
+static char GetNextANSIChar(SPJSONTokenizerState *stateInfo);
+
+
+@implementation SPJSONFormatter
+
++ (NSString *)stringByFormattingString:(NSString *)input
+{
+ SPJSONTokenizerState stateInfo;
+ if(SPJSONTokenizerInit(input,&stateInfo) == -1) return nil;
+
+ NSUInteger idLevel = 0;
+
+ NSCharacterSet *wsNlCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
+ NSMutableString *formatted = [[NSMutableString alloc] init];
+
+ SPJSONToken prevTokenType = JSON_TOK_EOF;
+ SPJSONTokenInfo curToken;
+ if(SPJSONTokenizerGetNextToken(&stateInfo,&curToken) == -1) {
+ [formatted release];
+ return nil;
+ }
+
+ BOOL needIndent = NO;
+ SPJSONTokenInfo nextToken;
+ do {
+ //we need to know the next token to do meaningful formatting
+ if(SPJSONTokenizerGetNextToken(&stateInfo,&nextToken) == -1) {
+ [formatted release];
+ return nil;
+ }
+
+ if(curToken.tok == JSON_TOK_SQUARE_BRACE_CLOSE || curToken.tok == JSON_TOK_CURLY_BRACE_CLOSE)
+ idLevel--;
+
+ //if this token is a "]" or "}" and there was no "[" or "{" directly before it, add a linebreak before
+ if(prevTokenType != JSON_TOK_CURLY_BRACE_OPEN && prevTokenType != JSON_TOK_SQUARE_BRACE_OPEN && (curToken.tok == JSON_TOK_SQUARE_BRACE_CLOSE || curToken.tok == JSON_TOK_CURLY_BRACE_CLOSE)) {
+ [formatted appendString:@"\n"];
+ needIndent = YES;
+ }
+
+ //if this token is on a new line indent it
+ if(needIndent && idLevel > 0) {
+ //32 tabs pool (with fallback for even deeper nesting)
+ static NSString *tabs = @"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
+ NSUInteger myIdLevel = idLevel;
+ while(myIdLevel > [tabs length]) {
+ [formatted appendString:tabs];
+ myIdLevel -= [tabs length];
+ }
+ [formatted appendString:[tabs substringWithRange:NSMakeRange(0, myIdLevel)]];
+ needIndent = NO;
+ }
+
+ //save ourselves the overhead of creating an NSString if we already know what it will contain
+ NSString *curTokenString;
+ id freeMe = nil;
+ switch (curToken.tok) {
+ case JSON_TOK_CURLY_BRACE_OPEN:
+ curTokenString = @"{";
+ break;
+
+ case JSON_TOK_CURLY_BRACE_CLOSE:
+ curTokenString = @"}";
+ break;
+
+ case JSON_TOK_SQUARE_BRACE_OPEN:
+ curTokenString = @"[";
+ break;
+
+ case JSON_TOK_SQUARE_BRACE_CLOSE:
+ curTokenString = @"]";
+ break;
+
+ case JSON_TOK_DOUBLE_QUOTE:
+ curTokenString = @"\"";
+ break;
+
+ case JSON_TOK_COLON:
+ curTokenString = @": "; //add a space after ":" for readability
+ break;
+
+ case JSON_TOK_COMMA:
+ curTokenString = @",";
+ break;
+
+ //JSON_TOK_OTHER
+ //JSON_TOK_STRINGDATA
+ default:
+ curTokenString = [[NSString alloc] initWithBytesNoCopy:(void *)(&stateInfo.str[curToken.pos]) length:curToken.len encoding:NSUTF8StringEncoding freeWhenDone:NO];
+ //for everything except strings get rid of surrounding whitespace
+ if(curToken.tok != JSON_TOK_STRINGDATA) {
+ NSString *newTokenString = [[curTokenString stringByTrimmingCharactersInSet:wsNlCharset] retain];
+ [curTokenString release];
+ curTokenString = newTokenString;
+ }
+ freeMe = curTokenString;
+ }
+
+ [formatted appendString:curTokenString];
+
+ if(freeMe) [freeMe release];
+
+ //if the current token is a "[", "{" or "," and the next token is not a "]" or "}" add a line break afterwards
+ if(
+ curToken.tok == JSON_TOK_COMMA ||
+ (curToken.tok == JSON_TOK_CURLY_BRACE_OPEN && nextToken.tok != JSON_TOK_CURLY_BRACE_CLOSE) ||
+ (curToken.tok == JSON_TOK_SQUARE_BRACE_OPEN && nextToken.tok != JSON_TOK_SQUARE_BRACE_CLOSE)
+ ) {
+ [formatted appendString:@"\n"];
+ needIndent = YES;
+ }
+
+ if(curToken.tok == JSON_TOK_CURLY_BRACE_OPEN || curToken.tok == JSON_TOK_SQUARE_BRACE_OPEN)
+ idLevel++;
+
+ prevTokenType = curToken.tok;
+ curToken = nextToken;
+ } while(curToken.tok != JSON_TOK_EOF); //SPJSONTokenizerGetNextToken() will always return JSON_TOK_EOF once it has reached that state
+
+ return [formatted autorelease];
+}
+
++ (NSString *)stringByUnformattingString:(NSString *)input
+{
+ SPJSONTokenizerState stateInfo;
+ if(SPJSONTokenizerInit(input,&stateInfo) == -1) return nil;
+
+ NSCharacterSet *wsNlCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
+ NSMutableString *unformatted = [[NSMutableString alloc] init];
+
+ do {
+ SPJSONTokenInfo curToken;
+ if(SPJSONTokenizerGetNextToken(&stateInfo,&curToken) == -1) {
+ [unformatted release];
+ return nil;
+ }
+
+ if(curToken.tok == JSON_TOK_EOF) break;
+
+ //save ourselves the overhead of creating an NSString from input if we already know what it will contain
+ NSString *curTokenString;
+ id freeMe = nil;
+ switch (curToken.tok) {
+ case JSON_TOK_CURLY_BRACE_OPEN:
+ curTokenString = @"{";
+ break;
+
+ case JSON_TOK_CURLY_BRACE_CLOSE:
+ curTokenString = @"}";
+ break;
+
+ case JSON_TOK_SQUARE_BRACE_OPEN:
+ curTokenString = @"[";
+ break;
+
+ case JSON_TOK_SQUARE_BRACE_CLOSE:
+ curTokenString = @"]";
+ break;
+
+ case JSON_TOK_DOUBLE_QUOTE:
+ curTokenString = @"\"";
+ break;
+
+ case JSON_TOK_COLON:
+ curTokenString = @": "; //add a space after ":" to match MySQL
+ break;
+
+ case JSON_TOK_COMMA:
+ curTokenString = @", "; //add a space after "," to match MySQL
+ break;
+
+ //JSON_TOK_OTHER
+ //JSON_TOK_STRINGDATA
+ default:
+ curTokenString = [[NSString alloc] initWithBytesNoCopy:(void *)(&stateInfo.str[curToken.pos]) length:curToken.len encoding:NSUTF8StringEncoding freeWhenDone:NO];
+ //for everything except strings get rid of surrounding whitespace
+ if(curToken.tok != JSON_TOK_STRINGDATA) {
+ NSString *newTokenString = [[curTokenString stringByTrimmingCharactersInSet:wsNlCharset] retain];
+ [curTokenString release];
+ curTokenString = newTokenString;
+ }
+ freeMe = curTokenString;
+ }
+
+ [unformatted appendString:curTokenString];
+
+ if(freeMe) [freeMe release];
+
+ } while(1);
+
+ return [unformatted autorelease];
+}
+
+
+@end
+
+/**
+ * This function returns the char at the current position in the input string and forwards the read pointer to the next char.
+ * If the character is part of an UTF8 multibyte sequence, the function will skip forward until a single byte character is found again
+ * or EOF is reached (whichever comes first).
+ *
+ * stateInfo MUST be valid or this will crash!
+ *
+ * @return Either a char in the range 0-127 or -1 if EOF is reached.
+ */
+char GetNextANSIChar(SPJSONTokenizerState *stateInfo) {
+ do {
+ if(stateInfo->pos >= stateInfo->len)
+ return -1;
+ char val = stateInfo->str[stateInfo->pos++];
+ // all utf8 multibyte characters start with the most significant bit being 1 for all of their bytes
+ // but since all JSON control characters are in the single byte ANSI compatible plane, we can just ignore any MB chars
+ if((val & 0x80) == 0)
+ return val;
+ } while(1);
+}
+
+int SPJSONTokenizerInit(NSString *input, SPJSONTokenizerState *stateInfo) {
+ if(!input || ![input respondsToSelector:@selector(UTF8String)] || stateInfo == NULL)
+ return -1;
+
+ stateInfo->ctxt = JSON_ROOT_CONTEXT;
+ stateInfo->pos = 0;
+ stateInfo->str = [input UTF8String];
+ stateInfo->len = strlen(stateInfo->str); //we deem -[NSString UTF8String] to be a safe source
+
+ return 0;
+}
+
+int SPJSONTokenizerGetNextToken(SPJSONTokenizerState *stateInfo, SPJSONTokenInfo *tokenMatch) {
+ if(tokenMatch == NULL || stateInfo == NULL || stateInfo->str == NULL)
+ return -1;
+
+ size_t posBefore = stateInfo->pos;
+ do {
+ char c = GetNextANSIChar(stateInfo);
+ if(stateInfo->ctxt == JSON_STRING_CONTEXT) {
+ //the only characters inside a string that are relevant to us are backslash and doublequote
+ if(c == '"' || c == -1) {
+ //if the string has contents, return that first
+ if((stateInfo->pos - posBefore) > 1) {
+ tokenMatch->tok = JSON_TOK_STRINGDATA;
+ tokenMatch->pos = posBefore;
+ if(c == '"')
+ stateInfo->pos--; //rewind to read it again
+ tokenMatch->len = stateInfo->pos - posBefore;
+ return 1;
+ }
+ //string is terminated by EOF (invalid JSON)
+ if(c == -1) {
+ //switch to root context and try again to reach EOF branch below
+ stateInfo->ctxt = JSON_ROOT_CONTEXT;
+ continue;
+ }
+ stateInfo->ctxt = JSON_ROOT_CONTEXT;
+ tokenMatch->tok = JSON_TOK_DOUBLE_QUOTE;
+ tokenMatch->pos = posBefore;
+ tokenMatch->len = stateInfo->pos - posBefore;
+ return 1;
+ }
+ else if(c == '\\') {
+ //for backslash we need to skip the next byte
+ // We don't care for the value of the next byte since we don't really want to parse JSON, but only format it.
+ // Thus we only have to pay attention to differntiate backslash-dquote and dquote.
+ stateInfo->pos++;
+ }
+ }
+ else if(c == -1) {
+ //if there is still unreturned input, return that first
+ if(posBefore < stateInfo->len) {
+ tokenMatch->tok = JSON_TOK_OTHER;
+ tokenMatch->pos = posBefore;
+ tokenMatch->len = stateInfo->pos - posBefore;
+ return 1;
+ }
+ tokenMatch->tok = JSON_TOK_EOF;
+ tokenMatch->pos = stateInfo->pos; //EOF sits after the last character
+ tokenMatch->len = 0; // EOF has no length
+ return 0;
+ }
+ else {
+ SPJSONToken tokFound = JSON_TOK_EOF;
+
+ switch(c) {
+ case '"':
+ stateInfo->ctxt = JSON_STRING_CONTEXT;
+ tokFound = JSON_TOK_DOUBLE_QUOTE;
+ break;
+
+ case '{':
+ tokFound = JSON_TOK_CURLY_BRACE_OPEN;
+ break;
+
+ case '}':
+ tokFound = JSON_TOK_CURLY_BRACE_CLOSE;
+ break;
+
+ case '[':
+ tokFound = JSON_TOK_SQUARE_BRACE_OPEN;
+ break;
+
+ case ']':
+ tokFound = JSON_TOK_SQUARE_BRACE_CLOSE;
+ break;
+
+ case ':':
+ tokFound = JSON_TOK_COLON;
+ break;
+
+ case ',':
+ tokFound = JSON_TOK_COMMA;
+ break;
+ }
+
+ //if we found a token, but had to walk more than 1 char there was something else
+ //between the previous token and this token, which we should report first
+ if(tokFound != JSON_TOK_EOF && (stateInfo->pos - posBefore) > 1) {
+ stateInfo->ctxt = JSON_ROOT_CONTEXT;
+ stateInfo->pos--; //rewind so we will read the token again next time
+ tokFound = JSON_TOK_OTHER;
+ }
+
+ if(tokFound != JSON_TOK_EOF) {
+ tokenMatch->tok = tokFound;
+ tokenMatch->pos = posBefore;
+ tokenMatch->len = stateInfo->pos - posBefore;
+ return 1;
+ }
+ }
+ } while(1);
+}