diff options
-rw-r--r-- | Source/SPSQLTokenizer.h | 32 | ||||
-rw-r--r-- | Source/SPSQLTokenizer.l | 134 | ||||
-rw-r--r-- | sequel-pro.xcodeproj/project.pbxproj | 9 |
3 files changed, 175 insertions, 0 deletions
diff --git a/Source/SPSQLTokenizer.h b/Source/SPSQLTokenizer.h new file mode 100644 index 00000000..7f459440 --- /dev/null +++ b/Source/SPSQLTokenizer.h @@ -0,0 +1,32 @@ +// +// SPSQLTokenizer.h +// sequel-pro +// +// Created by Hans-J. Bibiko on May 14, 2009 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// More info at <http://code.google.com/p/sequel-pro/> + +#define SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT 1 +#define SP_SQL_TOKEN_SINGLE_QUOTED_TEXT 2 +#define SP_SQL_TOKEN_COMMENT 3 +#define SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT 4 +#define SP_SQL_TOKEN_DELIM_START 5 +#define SP_SQL_TOKEN_DELIM_VALUE 6 +#define SP_SQL_TOKEN_DELIM_END 7 +#define SP_SQL_TOKEN_WHITESPACE 8 +#define SP_SQL_TOKEN_SEMICOLON 9 +#define SP_SQL_TOKEN_COMPOUND 10 diff --git a/Source/SPSQLTokenizer.l b/Source/SPSQLTokenizer.l new file mode 100644 index 00000000..95d0f76c --- /dev/null +++ b/Source/SPSQLTokenizer.l @@ -0,0 +1,134 @@ +%{ + +/* + * SPSQLTokenizer.l + * sequel-pro + * + * Created by Hans-J. Bibiko on May 14, 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * More info at <http://code.google.com/p/sequel-pro/> + */ + +#import "SPSQLTokenizer.h" +int utf8strlenfortoken(const char * _s); +int yyuoffset, yyuleng; + +#define YY_NO_UNPUT + +//keep track of the current utf-8 character (not byte) offset and token length +#define YY_USER_ACTION { yyuoffset += yyuleng; yyuleng = utf8strlenfortoken(yytext); } +%} +%option prefix="to" +%option noyywrap +%option case-insensitive + +s [ \t\n\r] +dkey "delimiter" +scol ";" +dval [!-゚] +compound "create"(.|\n|\r)+?"begin"(.|\n|\r)+?{s}+"end"{s}*?{scol} +%x comment +%x delim +%x delimbody + +%% + +\"([^"\\]|\\(.|[\n\r]))*\"? { return SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT; } +'([^'\\]|\\(.|[\n\r]))*'? { return SP_SQL_TOKEN_SINGLE_QUOTED_TEXT; } +`[^`]*`? { return SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT; } + +"/*" { BEGIN(comment); return SP_SQL_TOKEN_COMMENT; } +<comment>[^*]* { return SP_SQL_TOKEN_COMMENT; } +<comment>"*"+ { return SP_SQL_TOKEN_COMMENT; } +<comment>"*"+"/" { BEGIN(INITIAL); return SP_SQL_TOKEN_COMMENT; } +#[^\n\r]*(\n|\r)? | +--[ \t][^\n\r]*(\n|\r)? { return SP_SQL_TOKEN_COMMENT; } + +{s}+ { return SP_SQL_TOKEN_WHITESPACE; } + +{dkey}{s}+ { BEGIN(delim); return SP_SQL_TOKEN_DELIM_START; } +<delim>{dval}+ { BEGIN(delimbody); return SP_SQL_TOKEN_DELIM_VALUE; } +<delimbody>{s}+{dkey}{s}+{scol} { BEGIN(INITIAL); return SP_SQL_TOKEN_DELIM_END; } + +{compound} { return SP_SQL_TOKEN_COMPOUND; } + +{scol} { return SP_SQL_TOKEN_SEMICOLON; } +[.\r\n] { return SP_SQL_TOKEN_WHITESPACE; } + + + + + +<<EOF>> { + BEGIN(INITIAL); /* make sure we return to initial state when finished! */ + yy_delete_buffer(YY_CURRENT_BUFFER); + return 0; + } +%% +#define ONEMASK ((size_t)(-1) / 0xFF) +// adapted from http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html +int utf8strlenfortoken(const char * _s) +{ + const char * s; + size_t count = 0; + size_t u; + unsigned char b; + + /* Handle any initial misaligned bytes. */ + for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) { + b = *s; + + /* Exit if we hit a zero byte. */ + if (b == '\0') + goto done; + + /* Is this byte NOT the first byte of a character? */ + count += (b >> 7) & ((~b) >> 6); + } + + /* Handle complete blocks. */ + for (; ; s += sizeof(size_t)) { + /* Prefetch 256 bytes ahead. */ + __builtin_prefetch(&s[256], 0, 0); + + /* Grab 4 or 8 bytes of UTF-8 data. */ + u = *(size_t *)(s); + + /* Exit the loop if there are any zero bytes. */ + if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80)) + break; + + /* Count bytes which are NOT the first byte of a character. */ + u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6); + count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8); + } + + /* Take care of any left-over bytes. */ + for (; ; s++) { + b = *s; + + /* Exit if we hit a zero byte. */ + if (b == '\0') + break; + + /* Is this byte NOT the first byte of a character? */ + count += (b >> 7) & ((~b) >> 6); + } + +done: + return ((s - _s) - count); +} diff --git a/sequel-pro.xcodeproj/project.pbxproj b/sequel-pro.xcodeproj/project.pbxproj index 2a5417d8..1efc424b 100644 --- a/sequel-pro.xcodeproj/project.pbxproj +++ b/sequel-pro.xcodeproj/project.pbxproj @@ -125,6 +125,7 @@ B5EAC0FD0EC87FF900CC579C /* Security.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B5EAC0FC0EC87FF900CC579C /* Security.framework */; }; B5F4F7810F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff in Resources */ = {isa = PBXBuildFile; fileRef = B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */; }; BC2C8E220FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html in Resources */ = {isa = PBXBuildFile; fileRef = BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */; }; + BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */ = {isa = PBXBuildFile; fileRef = BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -330,6 +331,8 @@ B5EAC0FC0EC87FF900CC579C /* Security.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Security.framework; path = System/Library/Frameworks/Security.framework; sourceTree = SDKROOT; }; B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "toolbar-switch-to-procedures.tiff"; sourceTree = "<group>"; }; BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "sequel-pro-mysql-help-template.html"; sourceTree = "<group>"; }; + BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.lex; path = SPSQLTokenizer.l; sourceTree = "<group>"; }; + BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SPSQLTokenizer.h; sourceTree = "<group>"; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -702,6 +705,8 @@ 58FEF16C0F23D66600518E8E /* SPSQLParser.m */, 179F15040F7C433C00579954 /* SPEditorTokens.h */, 179F15050F7C433C00579954 /* SPEditorTokens.l */, + BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */, + BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */, ); name = Parsing; sourceTree = "<group>"; @@ -901,6 +906,7 @@ 296DC8BB0F909194002A3258 /* NSDictionary_DeepMutableCopy.m in Sources */, 296DC8BC0F909194002A3258 /* MGTemplateStandardFilters.m in Sources */, 5841423F0F97E11000A34B47 /* NoodleLineNumberView.m in Sources */, + BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1066,6 +1072,9 @@ GCC_WARN_UNUSED_VARIABLE = YES; IBC_FLATTEN_NIBS = NO; IBC_NOTICES = NO; + LEXFLAGS = ""; + LEX_INSERT_LINE_DIRECTIVES = YES; + LEX_SUPPRESS_DEFAULT_RULE = NO; ONLY_ACTIVE_ARCH = YES; PREBINDING = NO; SDKROOT = macosx10.5; |