aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBibiko <bibiko@eva.mpg.de>2009-05-14 15:50:53 +0000
committerBibiko <bibiko@eva.mpg.de>2009-05-14 15:50:53 +0000
commitbe4aac5c809f8f74c20f1d7b03a932e5ee0720df (patch)
treefc60de88fa04fe613f16c3b78b1a6f53a1eb1ebb
parentb60ee8e3720b0dac888f5d542869712a6c16e409 (diff)
downloadsequelpro-be4aac5c809f8f74c20f1d7b03a932e5ee0720df.tar.gz
sequelpro-be4aac5c809f8f74c20f1d7b03a932e5ee0720df.tar.bz2
sequelpro-be4aac5c809f8f74c20f1d7b03a932e5ee0720df.zip
• added SPSQLTokenizer
- this is an approach to make usage of lex to split a string very fast into SQL queries considering the "delimiter" switch and compound-statements via CREATE ... BEGIN ... END; without using "delimiter"
-rw-r--r--Source/SPSQLTokenizer.h32
-rw-r--r--Source/SPSQLTokenizer.l134
-rw-r--r--sequel-pro.xcodeproj/project.pbxproj9
3 files changed, 175 insertions, 0 deletions
diff --git a/Source/SPSQLTokenizer.h b/Source/SPSQLTokenizer.h
new file mode 100644
index 00000000..7f459440
--- /dev/null
+++ b/Source/SPSQLTokenizer.h
@@ -0,0 +1,32 @@
+//
+// SPSQLTokenizer.h
+// sequel-pro
+//
+// Created by Hans-J. Bibiko on May 14, 2009
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+// More info at <http://code.google.com/p/sequel-pro/>
+
+#define SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT 1
+#define SP_SQL_TOKEN_SINGLE_QUOTED_TEXT 2
+#define SP_SQL_TOKEN_COMMENT 3
+#define SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT 4
+#define SP_SQL_TOKEN_DELIM_START 5
+#define SP_SQL_TOKEN_DELIM_VALUE 6
+#define SP_SQL_TOKEN_DELIM_END 7
+#define SP_SQL_TOKEN_WHITESPACE 8
+#define SP_SQL_TOKEN_SEMICOLON 9
+#define SP_SQL_TOKEN_COMPOUND 10
diff --git a/Source/SPSQLTokenizer.l b/Source/SPSQLTokenizer.l
new file mode 100644
index 00000000..95d0f76c
--- /dev/null
+++ b/Source/SPSQLTokenizer.l
@@ -0,0 +1,134 @@
+%{
+
+/*
+ * SPSQLTokenizer.l
+ * sequel-pro
+ *
+ * Created by Hans-J. Bibiko on May 14, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * More info at <http://code.google.com/p/sequel-pro/>
+ */
+
+#import "SPSQLTokenizer.h"
+int utf8strlenfortoken(const char * _s);
+int yyuoffset, yyuleng;
+
+#define YY_NO_UNPUT
+
+//keep track of the current utf-8 character (not byte) offset and token length
+#define YY_USER_ACTION { yyuoffset += yyuleng; yyuleng = utf8strlenfortoken(yytext); }
+%}
+%option prefix="to"
+%option noyywrap
+%option case-insensitive
+
+s [ \t\n\r]
+dkey "delimiter"
+scol ";"
+dval [!-゚]
+compound "create"(.|\n|\r)+?"begin"(.|\n|\r)+?{s}+"end"{s}*?{scol}
+%x comment
+%x delim
+%x delimbody
+
+%%
+
+\"([^"\\]|\\(.|[\n\r]))*\"? { return SP_SQL_TOKEN_DOUBLE_QUOTED_TEXT; }
+'([^'\\]|\\(.|[\n\r]))*'? { return SP_SQL_TOKEN_SINGLE_QUOTED_TEXT; }
+`[^`]*`? { return SP_SQL_TOKEN_BACKTICK_QUOTED_TEXT; }
+
+"/*" { BEGIN(comment); return SP_SQL_TOKEN_COMMENT; }
+<comment>[^*]* { return SP_SQL_TOKEN_COMMENT; }
+<comment>"*"+ { return SP_SQL_TOKEN_COMMENT; }
+<comment>"*"+"/" { BEGIN(INITIAL); return SP_SQL_TOKEN_COMMENT; }
+#[^\n\r]*(\n|\r)? |
+--[ \t][^\n\r]*(\n|\r)? { return SP_SQL_TOKEN_COMMENT; }
+
+{s}+ { return SP_SQL_TOKEN_WHITESPACE; }
+
+{dkey}{s}+ { BEGIN(delim); return SP_SQL_TOKEN_DELIM_START; }
+<delim>{dval}+ { BEGIN(delimbody); return SP_SQL_TOKEN_DELIM_VALUE; }
+<delimbody>{s}+{dkey}{s}+{scol} { BEGIN(INITIAL); return SP_SQL_TOKEN_DELIM_END; }
+
+{compound} { return SP_SQL_TOKEN_COMPOUND; }
+
+{scol} { return SP_SQL_TOKEN_SEMICOLON; }
+[.\r\n] { return SP_SQL_TOKEN_WHITESPACE; }
+
+
+
+
+
+<<EOF>> {
+ BEGIN(INITIAL); /* make sure we return to initial state when finished! */
+ yy_delete_buffer(YY_CURRENT_BUFFER);
+ return 0;
+ }
+%%
+#define ONEMASK ((size_t)(-1) / 0xFF)
+// adapted from http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html
+int utf8strlenfortoken(const char * _s)
+{
+ const char * s;
+ size_t count = 0;
+ size_t u;
+ unsigned char b;
+
+ /* Handle any initial misaligned bytes. */
+ for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) {
+ b = *s;
+
+ /* Exit if we hit a zero byte. */
+ if (b == '\0')
+ goto done;
+
+ /* Is this byte NOT the first byte of a character? */
+ count += (b >> 7) & ((~b) >> 6);
+ }
+
+ /* Handle complete blocks. */
+ for (; ; s += sizeof(size_t)) {
+ /* Prefetch 256 bytes ahead. */
+ __builtin_prefetch(&s[256], 0, 0);
+
+ /* Grab 4 or 8 bytes of UTF-8 data. */
+ u = *(size_t *)(s);
+
+ /* Exit the loop if there are any zero bytes. */
+ if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80))
+ break;
+
+ /* Count bytes which are NOT the first byte of a character. */
+ u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6);
+ count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8);
+ }
+
+ /* Take care of any left-over bytes. */
+ for (; ; s++) {
+ b = *s;
+
+ /* Exit if we hit a zero byte. */
+ if (b == '\0')
+ break;
+
+ /* Is this byte NOT the first byte of a character? */
+ count += (b >> 7) & ((~b) >> 6);
+ }
+
+done:
+ return ((s - _s) - count);
+}
diff --git a/sequel-pro.xcodeproj/project.pbxproj b/sequel-pro.xcodeproj/project.pbxproj
index 2a5417d8..1efc424b 100644
--- a/sequel-pro.xcodeproj/project.pbxproj
+++ b/sequel-pro.xcodeproj/project.pbxproj
@@ -125,6 +125,7 @@
B5EAC0FD0EC87FF900CC579C /* Security.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B5EAC0FC0EC87FF900CC579C /* Security.framework */; };
B5F4F7810F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff in Resources */ = {isa = PBXBuildFile; fileRef = B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */; };
BC2C8E220FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html in Resources */ = {isa = PBXBuildFile; fileRef = BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */; };
+ BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */ = {isa = PBXBuildFile; fileRef = BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@@ -330,6 +331,8 @@
B5EAC0FC0EC87FF900CC579C /* Security.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Security.framework; path = System/Library/Frameworks/Security.framework; sourceTree = SDKROOT; };
B5F4F7800F7BCF990059AE84 /* toolbar-switch-to-procedures.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "toolbar-switch-to-procedures.tiff"; sourceTree = "<group>"; };
BC2C8E210FA8C2DB008468C7 /* sequel-pro-mysql-help-template.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "sequel-pro-mysql-help-template.html"; sourceTree = "<group>"; };
+ BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.lex; path = SPSQLTokenizer.l; sourceTree = "<group>"; };
+ BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SPSQLTokenizer.h; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -702,6 +705,8 @@
58FEF16C0F23D66600518E8E /* SPSQLParser.m */,
179F15040F7C433C00579954 /* SPEditorTokens.h */,
179F15050F7C433C00579954 /* SPEditorTokens.l */,
+ BCD0AD480FBBFC340066EA5C /* SPSQLTokenizer.l */,
+ BCD0AD4A0FBBFC480066EA5C /* SPSQLTokenizer.h */,
);
name = Parsing;
sourceTree = "<group>";
@@ -901,6 +906,7 @@
296DC8BB0F909194002A3258 /* NSDictionary_DeepMutableCopy.m in Sources */,
296DC8BC0F909194002A3258 /* MGTemplateStandardFilters.m in Sources */,
5841423F0F97E11000A34B47 /* NoodleLineNumberView.m in Sources */,
+ BCD0AD490FBBFC340066EA5C /* SPSQLTokenizer.l in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -1066,6 +1072,9 @@
GCC_WARN_UNUSED_VARIABLE = YES;
IBC_FLATTEN_NIBS = NO;
IBC_NOTICES = NO;
+ LEXFLAGS = "";
+ LEX_INSERT_LINE_DIRECTIVES = YES;
+ LEX_SUPPRESS_DEFAULT_RULE = NO;
ONLY_ACTIVE_ARCH = YES;
PREBINDING = NO;
SDKROOT = macosx10.5;