//
//  SPJSONFormatter.m
//  sequel-pro
//
//  Created by Max Lohrmann on 10.02.17.
//  Copyright (c) 2017 Max Lohrmann. All rights reserved.
//
//  Permission is hereby granted, free of charge, to any person
//  obtaining a copy of this software and associated documentation
//  files (the "Software"), to deal in the Software without
//  restriction, including without limitation the rights to use,
//  copy, modify, merge, publish, distribute, sublicense, and/or sell
//  copies of the Software, and to permit persons to whom the
//  Software is furnished to do so, subject to the following
//  conditions:
//
//  The above copyright notice and this permission notice shall be
//  included in all copies or substantial portions of the Software.
//
//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
//  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
//  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
//  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
//  OTHER DEALINGS IN THE SOFTWARE.
//
//  More info at <https://github.com/sequelpro/sequelpro>

#import "SPJSONFormatter.h"


static char GetNextANSIChar(SPJSONTokenizerState *stateInfo);


@implementation SPJSONFormatter

+ (NSString *)stringByFormattingString:(NSString *)input
{
	SPJSONTokenizerState stateInfo;
	if(SPJSONTokenizerInit(input,&stateInfo) == -1) return nil;
	
	NSUInteger idLevel = 0;
	
	NSCharacterSet *wsNlCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
	NSMutableString *formatted = [[NSMutableString alloc] init];
	
	SPJSONToken prevTokenType = JSON_TOK_EOF;
	SPJSONTokenInfo curToken;
	if(SPJSONTokenizerGetNextToken(&stateInfo,&curToken) == -1) {
		[formatted release];
		return nil;
	}
	
	BOOL needIndent = NO;
	SPJSONTokenInfo nextToken;
	do {
		//we need to know the next token to do meaningful formatting
		if(SPJSONTokenizerGetNextToken(&stateInfo,&nextToken) == -1) {
			[formatted release];
			return nil;
		}
		
		if(idLevel > 0 && (curToken.tok == JSON_TOK_SQUARE_BRACE_CLOSE || curToken.tok == JSON_TOK_CURLY_BRACE_CLOSE))
			idLevel--;
		
		//if this token is a "]" or "}" and there was no ",", "[" or "{" directly before it, add a linebreak before
		if(prevTokenType != JSON_TOK_CURLY_BRACE_OPEN && prevTokenType != JSON_TOK_SQUARE_BRACE_OPEN && prevTokenType != JSON_TOK_COMMA && (curToken.tok == JSON_TOK_SQUARE_BRACE_CLOSE || curToken.tok == JSON_TOK_CURLY_BRACE_CLOSE)) {
			[formatted appendString:@"\n"];
			needIndent = YES;
		}
		
		//if this token is on a new line indent it
		if(needIndent && idLevel > 0) {
			//32 tabs pool (with fallback for even deeper nesting)
			static NSString *tabs = @"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
			NSUInteger myIdLevel = idLevel;
			while(myIdLevel > [tabs length]) {
				[formatted appendString:tabs];
				myIdLevel -= [tabs length];
			}
			[formatted appendString:[tabs substringWithRange:NSMakeRange(0, myIdLevel)]];
			needIndent = NO;
		}
		
		//save ourselves the overhead of creating an NSString if we already know what it will contain
		NSString *curTokenString;
		id freeMe = nil;
		switch (curToken.tok) {
			case JSON_TOK_CURLY_BRACE_OPEN:
				curTokenString = @"{";
				break;
				
			case JSON_TOK_CURLY_BRACE_CLOSE:
				curTokenString = @"}";
				break;
				
			case JSON_TOK_SQUARE_BRACE_OPEN:
				curTokenString = @"[";
				break;
				
			case JSON_TOK_SQUARE_BRACE_CLOSE:
				curTokenString = @"]";
				break;
				
			case JSON_TOK_DOUBLE_QUOTE:
				curTokenString = @"\"";
				break;
				
			case JSON_TOK_COLON:
				curTokenString = @": "; //add a space after ":" for readability
				break;
				
			case JSON_TOK_COMMA:
				curTokenString = @",";
				break;
				
			//JSON_TOK_OTHER
			//JSON_TOK_STRINGDATA
			default:
				curTokenString = [[NSString alloc] initWithBytesNoCopy:(void *)(&stateInfo.str[curToken.pos]) length:curToken.len encoding:NSUTF8StringEncoding freeWhenDone:NO];
				//for everything except strings get rid of surrounding whitespace
				if(curToken.tok != JSON_TOK_STRINGDATA) {
					NSString *newTokenString = [[curTokenString stringByTrimmingCharactersInSet:wsNlCharset] retain];
					[curTokenString release];
					curTokenString = newTokenString;
				}
				freeMe = curTokenString;
		}
		
		[formatted appendString:curTokenString];
		
		if(freeMe) [freeMe release];
		
		//if the current token is a "[", "{" or "," and the next token is not a "]" or "}" add a line break afterwards
		if(
		   curToken.tok == JSON_TOK_COMMA ||
		   (curToken.tok == JSON_TOK_CURLY_BRACE_OPEN && nextToken.tok != JSON_TOK_CURLY_BRACE_CLOSE) ||
		   (curToken.tok == JSON_TOK_SQUARE_BRACE_OPEN && nextToken.tok != JSON_TOK_SQUARE_BRACE_CLOSE)
		) {
			[formatted appendString:@"\n"];
			needIndent = YES;
		}
		
		if(curToken.tok == JSON_TOK_CURLY_BRACE_OPEN || curToken.tok == JSON_TOK_SQUARE_BRACE_OPEN)
			idLevel++;
		
		prevTokenType = curToken.tok;
		curToken = nextToken;
	} while(curToken.tok != JSON_TOK_EOF); //SPJSONTokenizerGetNextToken() will always return JSON_TOK_EOF once it has reached that state
	
	return [formatted autorelease];
}

+ (NSString *)stringByUnformattingString:(NSString *)input
{
	SPJSONTokenizerState stateInfo;
	if(SPJSONTokenizerInit(input,&stateInfo) == -1) return nil;
	
	NSCharacterSet *wsNlCharset = [NSCharacterSet whitespaceAndNewlineCharacterSet];
	NSMutableString *unformatted = [[NSMutableString alloc] init];
	
	do {
		SPJSONTokenInfo curToken;
		if(SPJSONTokenizerGetNextToken(&stateInfo,&curToken) == -1) {
			[unformatted release];
			return nil;
		}
		
		if(curToken.tok == JSON_TOK_EOF) break;
		
		//save ourselves the overhead of creating an NSString from input if we already know what it will contain
		NSString *curTokenString;
		id freeMe = nil;
		switch (curToken.tok) {
			case JSON_TOK_CURLY_BRACE_OPEN:
				curTokenString = @"{";
				break;
				
			case JSON_TOK_CURLY_BRACE_CLOSE:
				curTokenString = @"}";
				break;
				
			case JSON_TOK_SQUARE_BRACE_OPEN:
				curTokenString = @"[";
				break;
				
			case JSON_TOK_SQUARE_BRACE_CLOSE:
				curTokenString = @"]";
				break;
				
			case JSON_TOK_DOUBLE_QUOTE:
				curTokenString = @"\"";
				break;
				
			case JSON_TOK_COLON:
				curTokenString = @": "; //add a space after ":" to match MySQL
				break;
				
			case JSON_TOK_COMMA:
				curTokenString = @", "; //add a space after "," to match MySQL
				break;
				
			//JSON_TOK_OTHER
			//JSON_TOK_STRINGDATA
			default:
				curTokenString = [[NSString alloc] initWithBytesNoCopy:(void *)(&stateInfo.str[curToken.pos]) length:curToken.len encoding:NSUTF8StringEncoding freeWhenDone:NO];
				//for everything except strings get rid of surrounding whitespace
				if(curToken.tok != JSON_TOK_STRINGDATA) {
					NSString *newTokenString = [[curTokenString stringByTrimmingCharactersInSet:wsNlCharset] retain];
					[curTokenString release];
					curTokenString = newTokenString;
				}
				freeMe = curTokenString;
		}
		
		[unformatted appendString:curTokenString];
		
		if(freeMe) [freeMe release];
		
	} while(1);
	
	return [unformatted autorelease];
}


@end

/**
 * This function returns the char at the current position in the input string and forwards the read pointer to the next char.
 * If the character is part of an UTF8 multibyte sequence, the function will skip forward until a single byte character is found again
 * or EOF is reached (whichever comes first).
 *
 * stateInfo MUST be valid or this will crash!
 *
 * @return Either a char in the range 0-127 or -1 if EOF is reached.
 */
char GetNextANSIChar(SPJSONTokenizerState *stateInfo) {
	do {
		if(stateInfo->pos >= stateInfo->len)
			return -1;
		char val = stateInfo->str[stateInfo->pos++];
		// all utf8 multibyte characters start with the most significant bit being 1 for all of their bytes
		// but since all JSON control characters are in the single byte ANSI compatible plane, we can just ignore any MB chars
		if((val & 0x80) == 0)
			return val;
	} while(1);
}

int SPJSONTokenizerInit(NSString *input, SPJSONTokenizerState *stateInfo) {
	if(!input || ![input respondsToSelector:@selector(UTF8String)] || stateInfo == NULL)
		return -1;
	
	stateInfo->ctxt = JSON_ROOT_CONTEXT;
	stateInfo->pos = 0;
	stateInfo->str = [input UTF8String];
	stateInfo->len = [input lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
	
	return 0;
}

int SPJSONTokenizerGetNextToken(SPJSONTokenizerState *stateInfo, SPJSONTokenInfo *tokenMatch) {
	if(tokenMatch == NULL || stateInfo == NULL || stateInfo->str == NULL)
		return -1;
	
	size_t posBefore = stateInfo->pos;
	do {
		char c = GetNextANSIChar(stateInfo);
		if(stateInfo->ctxt == JSON_STRING_CONTEXT) {
			//the only characters inside a string that are relevant to us are backslash and doublequote
			if(c == '"' || c == -1) {
				//if the string has contents, return that first
				if((stateInfo->pos - posBefore) > 1) {
					tokenMatch->tok = JSON_TOK_STRINGDATA;
					tokenMatch->pos = posBefore;
					if(c == '"')
						stateInfo->pos--; //rewind to read it again
					tokenMatch->len = stateInfo->pos - posBefore;
					return 1;
				}
				//string is terminated by EOF (invalid JSON)
				if(c == -1) {
					//switch to root context and try again to reach EOF branch below
					stateInfo->ctxt = JSON_ROOT_CONTEXT;
					continue;
				}
				stateInfo->ctxt = JSON_ROOT_CONTEXT;
				tokenMatch->tok = JSON_TOK_DOUBLE_QUOTE;
				tokenMatch->pos = posBefore;
				tokenMatch->len = stateInfo->pos - posBefore;
				return 1;
			}
			else if(c == '\\') {
				//for backslash we need to skip the next byte
				// We don't care for the value of the next byte since we don't really want to parse JSON, but only format it.
				// Thus we only have to pay attention to differntiate backslash-dquote and dquote.
				stateInfo->pos++;
			}
		}
		else if(c == -1) {
			//if there is still unreturned input, return that first
			if(posBefore < stateInfo->len) {
				tokenMatch->tok = JSON_TOK_OTHER;
				tokenMatch->pos = posBefore;
				tokenMatch->len = stateInfo->pos - posBefore;
				return 1;
			}
			tokenMatch->tok = JSON_TOK_EOF;
			tokenMatch->pos = stateInfo->pos; //EOF sits after the last character
			tokenMatch->len = 0; // EOF has no length
			return 0;
		}
		else {
			SPJSONToken tokFound = JSON_TOK_EOF;
			
			switch(c) {
				case '"':
					stateInfo->ctxt = JSON_STRING_CONTEXT;
					tokFound = JSON_TOK_DOUBLE_QUOTE;
					break;
					
				case '{':
					tokFound = JSON_TOK_CURLY_BRACE_OPEN;
					break;
					
				case '}':
					tokFound = JSON_TOK_CURLY_BRACE_CLOSE;
					break;
					
				case '[':
					tokFound = JSON_TOK_SQUARE_BRACE_OPEN;
					break;
					
				case ']':
					tokFound = JSON_TOK_SQUARE_BRACE_CLOSE;
					break;
					
				case ':':
					tokFound = JSON_TOK_COLON;
					break;
					
				case ',':
					tokFound = JSON_TOK_COMMA;
					break;
			}
			
			//if we found a token, but had to walk more than 1 char there was something else
			//between the previous token and this token, which we should report first
			if(tokFound != JSON_TOK_EOF && (stateInfo->pos - posBefore) > 1) {
				stateInfo->ctxt = JSON_ROOT_CONTEXT;
				stateInfo->pos--; //rewind so we will read the token again next time
				tokFound = JSON_TOK_OTHER;
			}
			
			if(tokFound != JSON_TOK_EOF) {
				tokenMatch->tok = tokFound;
				tokenMatch->pos = posBefore;
				tokenMatch->len = stateInfo->pos - posBefore;
				return 1;
			}
		}
	} while(1);
}