From be4aac5c809f8f74c20f1d7b03a932e5ee0720df Mon Sep 17 00:00:00 2001 From: Bibiko Date: Thu, 14 May 2009 15:50:53 +0000 Subject: =?UTF-8?q?=E2=80=A2=20added=20SPSQLTokenizer=20-=20this=20is=20an?= =?UTF-8?q?=20approach=20to=20make=20usage=20of=20lex=20to=20split=20a=20s?= =?UTF-8?q?tring=20very=20fast=20into=20SQL=20queries=20considering=20the?= =?UTF-8?q?=20"delimiter"=20switch=20and=20compound-statements=20via=20CRE?= =?UTF-8?q?ATE=20...=20BEGIN=20...=20END;=20without=20using=20"delimiter"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/SPSQLTokenizer.h | 32 +++++++++ Source/SPSQLTokenizer.l | 134 +++++++++++++++++++++++++++++++++++ sequel-pro.xcodeproj/project.pbxproj | 9 +++ 3 files changed, 175 insertions(+) create mode 100644 Source/SPSQLTokenizer.h create mode 100644 Source/SPSQLTokenizer.l diff --git a/Source/SPSQLTokenizer.h b/Source/SPSQLTokenizer.h new file mode 100644 index 00000000..7f459440 --- /dev/null +++ b/Source/SPSQLTokenizer.h @@ -0,0 +1,32 @@ +// +// SPSQLTokenizer.h +// sequel-pro +// +// Created by Hans-J. Bibiko on May 14, 2009 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// More info at + +#define SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT 1 +#define SP_SQL_TOKEN_SINGLE_QUOTED_TEXT 2 +#define SP_SQL_TOKEN_COMMENT 3 +#define SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT 4 +#define SP_SQL_TOKEN_DELIM_START 5 +#define SP_SQL_TOKEN_DELIM_VALUE 6 +#define SP_SQL_TOKEN_DELIM_END 7 +#define SP_SQL_TOKEN_WHITESPACE 8 +#define SP_SQL_TOKEN_SEMICOLON 9 +#define SP_SQL_TOKEN_COMPOUND 10 diff --git a/Source/SPSQLTokenizer.l b/Source/SPSQLTokenizer.l new file mode 100644 index 00000000..95d0f76c --- /dev/null +++ b/Source/SPSQLTokenizer.l @@ -0,0 +1,134 @@ +%{ + +/* + * SPSQLTokenizer.l + * sequel-pro + * + * Created by Hans-J. Bibiko on May 14, 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * More info at + */ + +#import "SPSQLTokenizer.h" +int utf8strlenfortoken(const char * _s); +int yyuoffset, yyuleng; + +#define YY_NO_UNPUT + +//keep track of the current utf-8 character (not byte) offset and token length +#define YY_USER_ACTION { yyuoffset += yyuleng; yyuleng = utf8strlenfortoken(yytext); } +%} +%option prefix="to" +%option noyywrap +%option case-insensitive + +s [ \t\n\r] +dkey "delimiter" +scol ";" +dval [!-゚] +compound "create"(.|\n|\r)+?"begin"(.|\n|\r)+?{s}+"end"{s}*?{scol} +%x comment +%x delim +%x delimbody + +%% + +\"([^"\\]|\\(.|[\n\r]))*\"? { return SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT; } +'([^'\\]|\\(.|[\n\r]))*'? { return SP_SQL_TOKEN_SINGLE_QUOTED_TEXT; } +`[^`]*`? { return SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT; } + +"/*" { BEGIN(comment); return SP_SQL_TOKEN_COMMENT; } +[^*]* { return SP_SQL_TOKEN_COMMENT; } +"*"+ { return SP_SQL_TOKEN_COMMENT; } +"*"+"/" { BEGIN(INITIAL); return SP_SQL_TOKEN_COMMENT; } +#[^\n\r]*(\n|\r)? | +--[ \t][^\n\r]*(\n|\r)? { return SP_SQL_TOKEN_COMMENT; } + +{s}+ { return SP_SQL_TOKEN_WHITESPACE; } + +{dkey}{s}+ { BEGIN(delim); return SP_SQL_TOKEN_DELIM_START; } +{dval}+ { BEGIN(delimbody); return SP_SQL_TOKEN_DELIM_VALUE; } +{s}+{dkey}{s}+{scol} { BEGIN(INITIAL); return SP_SQL_TOKEN_DELIM_END; } + +{compound} { return SP_SQL_TOKEN_COMPOUND; } + +{scol} { return SP_SQL_TOKEN_SEMICOLON; } +[.\r\n] { return SP_SQL_TOKEN_WHITESPACE; } + + + + + +<> { + BEGIN(INITIAL); /* make sure we return to initial state when finished! */ + yy_delete_buffer(YY_CURRENT_BUFFER); + return 0; + } +%% +#define ONEMASK ((size_t)(-1) / 0xFF) +// adapted from http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html +int utf8strlenfortoken(const char * _s) +{ + const char * s; + size_t count = 0; + size_t u; + unsigned char b; + + /* Handle any initial misaligned bytes. */ + for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) { + b = *s; + + /* Exit if we hit a zero byte. */ + if (b == '\0') + goto done; + + /* Is this byte NOT the first byte of a character? */ + count += (b >> 7) & ((~b) >> 6); + } + + /* Handle complete blocks. */ + for (; ; s += sizeof(size_t)) { + /* Prefetch 256 bytes ahead. */ + __builtin_prefetch(&s[256], 0, 0); + + /* Grab 4 or 8 bytes of UTF-8 data. */ + u = *(size_t *)(s); + + /* Exit the loop if there are any zero bytes. */ + if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80)) + break; + + /* Count bytes which are NOT the first byte of a character. */ + u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6); + count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8); + } + + /* Take care of any left-over bytes. */ + for (; ; s++) { + b = *s; + + /* Exit if we hit a zero byte. */ + if (b == '\0') + break; + + /* Is this byte NOT the first byte of a character? */ + count += (b >> 7) & ((~b) >> 6); + } + +done: + return ((s - _s) - count); +} diff --git a/sequel-pro.xcodeproj/project.pbxproj b/sequel-pro.xcodeproj/project.pbxproj index 2a5417d8..1efc424b 100644 --- a/sequel-pro.xcodeproj/project.pbxproj +++ b/sequel-pro.xcodeproj/project.pbxproj @@ -125,6 +125,7 @@ B5EAC0FD0EC87FF900CC579C /* Security.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B5EAC0FC0EC87FF900CC579C /* Security.framework */; }; B5F4F7810F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff in Resources */ = {isa = PBXBuildFile; fileRef = B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */; }; BC2C8E220FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html in Resources */ = {isa = PBXBuildFile; fileRef = BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */; }; + BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */ = {isa = PBXBuildFile; fileRef = BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -330,6 +331,8 @@ B5EAC0FC0EC87FF900CC579C /* Security.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Security.framework; path = System/Library/Frameworks/Security.framework; sourceTree = SDKROOT; }; B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "toolbar-switch-to-procedures.tiff"; sourceTree = ""; }; BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "sequel-pro-mysql-help-template.html"; sourceTree = ""; }; + BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.lex; path = SPSQLTokenizer.l; sourceTree = ""; }; + BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SPSQLTokenizer.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -702,6 +705,8 @@ 58FEF16C0F23D66600518E8E /* SPSQLParser.m */, 179F15040F7C433C00579954 /* SPEditorTokens.h */, 179F15050F7C433C00579954 /* SPEditorTokens.l */, + BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */, + BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */, ); name = Parsing; sourceTree = ""; @@ -901,6 +906,7 @@ 296DC8BB0F909194002A3258 /* NSDictionary_DeepMutableCopy.m in Sources */, 296DC8BC0F909194002A3258 /* MGTemplateStandardFilters.m in Sources */, 5841423F0F97E11000A34B47 /* NoodleLineNumberView.m in Sources */, + BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1066,6 +1072,9 @@ GCC_WARN_UNUSED_VARIABLE = YES; IBC_FLATTEN_NIBS = NO; IBC_NOTICES = NO; + LEXFLAGS = ""; + LEX_INSERT_LINE_DIRECTIVES = YES; + LEX_SUPPRESS_DEFAULT_RULE = NO; ONLY_ACTIVE_ARCH = YES; PREBINDING = NO; SDKROOT = macosx10.5; -- cgit v1.2.3