From 60a5d64518f09af80059e2e293849ff6b0d6be75 Mon Sep 17 00:00:00 2001 From: Max Date: Sun, 1 Feb 2015 20:19:52 +0100 Subject: Change lexer definition of high bytes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit flex does not(*) support UTF-8, therefore alpha [a-z_\.À-゚] has always been interpreted by flex as alpha [a-z_\.\xC3\x80-\xEF\xBE\x9F] I assume this is not what was indetend and the only reason it worked, is because C3 (195),BE (190) and 9F (159) are already covered by 80-EF (128-239). Incidentally this range would also cover the whole Unicode BMP in UTF8. This change should make it more obvious. (*) There were some patches in 2012 and 2014 but they don't seem to have been merged. --- Source/SPEditorTokens.l | 6 +++--- Source/SPSQLTokenizer.l | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/SPEditorTokens.l b/Source/SPEditorTokens.l index e2d3099a..2ac99be8 100644 --- a/Source/SPEditorTokens.l +++ b/Source/SPEditorTokens.l @@ -56,11 +56,11 @@ size_t yyuoffset, yyuleng; %option case-insensitive s [ \t\n\r]+ -alpha [a-z_\.À-゚] +alpha [a-z_\.\x80-\xEF] numeric ([+-]?(([0-9]+\.[0-9]+)|([0-9]*\.[0-9]+)|([0-9]+))(e[+-]?[0-9]+)?) ops "+"|"-"|"*"|"/" -word [a-z_\.0-9À-゚@] -variable @{1,2}[a-z_\.0-9À-゚$]+ +word [a-z_\.0-9\x80-\xEF@] +variable @{1,2}[a-z_\.0-9\x80-\xEF$]+ keyworda (G(R(OUP{s}BY|ANT(S)?)|E(NERAL|T_FORMAT|OMETRY(COLLECTION)?)|LOBAL)|B(Y(TE)?|TREE|I(GINT|N(LOG|ARY)|T)|O(TH|OL(EAN)?)|E(GIN|TWEEN|FORE)|LOB|ACKUP{s}TABLE)|H(IGH_PRIORITY|O(ST(S)?|UR(_(MI(NUTE|CROSECOND)|SECOND))?)|ELP|A(SH|NDLER|VING))|C(R(OSS|EATE)|H(ECK(SUM)?|A(R(SET|ACTER)?|NGE(D)?|IN))|IPHER|O(M(M(IT(TED)?|ENT)|P(RESSED|LETION|ACT))|N(S(TRAINT(_(SCHEMA|NAME|CATALOG))?|ISTENT)|NECTION|CURRENT|T(RIBUTORS|INUE|AINS)|DITION|VERT)|DE|L(UMN(S|_(NAME|FORMAT))?|LATE)|ALESCE{s}PARTITION)|U(R(RENT_(TIME(STAMP)?|DATE|USER)|SOR(_NAME)?)|BE)|L(IENT|OSE|ASS_ORIGIN)|A(S(CADE(D)?|E)|CHE{s}INDEX|TALOG_NAME|LL))|I(GNORE(_SERVER_IDS)?|MPORT{s}TABLESPACE|S(SUER|OLATION)?|N(S(TALL({s}PLUGIN)?|E(RT(_METHOD)?|NSITIVE))|N(O(BASE|DB)|ER)|T(1|2|8|3|O({s}(DUMP|OUT)FILE)?|4|E(RVAL|GER))?|ITIAL_SIZE|OUT|DEX(ES)?|VOKER|FILE)?|TERATE|O_THREAD|DENTIFIED|F)|D(ROP|YNAMIC|I(RECTORY|S(CARD{s}TABLESPACE|TINCT(ROW)?|K|ABLE{s}KEYS)|V)|O(UBLE)?|U(MPFILE|PLICATE|AL)|E(S(C(RIBE)?|_KEY_FILE)|C(IMAL|LARE)?|TERMINISTIC|F(INER|AULT)|L(ETE|AY(_KEY_WRITE|ED))|ALLOCATE)|A(Y(_(MI(NUTE|CROSECOND)|SECOND|HOUR))?|T(E(TIME)?|A(BASE(S)?|FILE)?)))|JOIN|E(RRORS|X(TEN(T_SIZE|DED)|I(STS|T)|P(LAIN|ANSION)|ECUTE)|SCAPE(D{s}BY)?|N(GINE(S)?|CLOSED{s}BY|D(S)?|UM|ABLE{s}KEYS)|VE(RY|NT)|LSE(IF)?|ACH)|K(ILL({s}(CONNECTION|QUERY))?|EY(S|_BLOCK_SIZE)?)|F(R(OM|AC_SECOND)|I(RST|XED|LE)|O(R(CE|EIGN)?|UND)|U(NCTION|LL(TEXT)?)|ETCH|L(OAT(8|4)?|USH)|A(ST|LSE))|A(G(GREGATE|AINST)|S(C(II)?|ENSITIVE)?|N(Y|D|ALYZE)|C(CESSIBLE|TION)|T|DD|UT(HORS|O(_INCREMENT|EXTEND_SIZE))|VG(_ROW_LENGTH)?|FTER|L(GORITHM|TER|L))) keywordl (R(TREE|IGHT|O(UTINE|W(S|_FORMAT)?|LL(BACK|UP))|E(GEXP|MOVE{s}PARTITIONING|BUILD{s}PARTITION|S(T(RICT|ORE{s}TABLE)|IGNAL|UME|ET)|NAME|COVER|TURN(S)?|ORGANIZE{s}PARTITION|D(O(_BUFFER_SIZE|FILE)|UNDANT)|P(EAT(ABLE)?|L(ICATION|ACE)|AIR)|VOKE|QUIRE|FERENCES|L(OAD|EASE|AY_(THREAD|LOG_(POS|FILE)))|A(D(S|_(ONLY|WRITE))?|L))|LIKE|ANGE)|M(YSQL_ERRNO|I(GRATE|N(_ROWS|UTE(_(MICROSECOND|SECOND))?)|CROSECOND|DDLEINT)|O(NTH|D(IF(Y|IES)|E)?)|U(TEX|LTI(PO(INT|LYGON)|LINESTRING))|E(RGE|MORY|SSAGE_TEXT|DIUM(BLOB|TEXT|INT)?)|A(X(_(ROWS|SIZE|CONNECTIONS_PER_HOUR|U(SER_CONNECTIONS|PDATES_PER_HOUR)|QUERIES_PER_HOUR)|VALUE)|STER(_(S(SL(_(C(IPHER|ERT|A(PATH)?)|VERIFY_SERVER_CERT|KEY))?|ERVER_ID)|H(OST|EARTBEAT_PERIOD)|CONNECT_RETRY|USER|P(ORT|ASSWORD)|LOG_(POS|FILE)))?|TCH))|N(CHAR|O(NE|_W(RITE_TO_BINLOG|AIT)|T|DEGROUP)?|DB(CLUSTER)?|U(MERIC|LL)|E(XT|W)|VARCHAR|A(ME(S)?|T(IONAL|URAL)))|O(R(DER{s}BY)?|N(({s}DUPLICATE{s}KEY{s}UPDATE)?|E(_SHOT)?|LINE)?|UT(ER|FILE)?|P(TI(MIZE|ON(S|ALLY)?)|EN)|FF(SET|LINE)|WNER|LD_PASSWORD)|P(R(I(MARY|VILEGES)|OCE(SS|DURE{s}(ANALYSE)?)|E(SERVE|CISION|PARE|V))|HASE|O(RT|INT|LYGON)|URGE|A(R(SER|TI(TION(S|ING)?|AL))|SSWORD|CK_KEYS))|QU(ICK|ERY|ARTER)|L(I(MIT|ST|NE(S(TRING)?|AR)|KE)|O(G(S|FILE({s}GROUP))|NG(BLOB|TEXT)?|C(K(S)?|AL(TIME(STAMP)?)?)|OP|W_PRIORITY|AD{s}(DATA|INDEX{s}INTO{s}CACHE|XML))|E(SS|VEL|FT|A(DING|VE(S)?))|A(ST|NGUAGE))) keywords (X(OR|509|A)|S(MALLINT|SL|H(OW({s}(E(NGINE(S)?|RRORS)|M(ASTER|UTEX)|BINLOG|GRANTS|INNODB|P(RIVILEGES|ROFILE(S)?|ROCEDURE{s}CODE)|SLAVE{s}(HOSTS|STATUS)|TRIGGERS|VARIABLES|WARNINGS|(FULL{s})?PROCESSLIST|FIELDS|PLUGIN(S)?|STORAGE{s}ENGINES|TABLE{s}TYPES|CO(LUMNS|LLATION)|BINLOG{s}EVENTS))?|UTDOWN|ARE)|NAPSHOT|CHE(MA(S|_NAME)?|DULE(R)?)|T(R(ING|AIGHT_JOIN)|O(RAGE|P)|A(RT(S|ING{s}BY)?|TUS))|I(GN(ED|AL)|MPLE)|O(ME|NAME|CKET|UNDS)|U(B(CLASS_ORIGIN|JECT|PARTITION(S)?)|SPEND|PER)|P(ECIFIC|ATIAL)|E(R(IAL(IZABLE)?|VER)|SSION|NSITIVE|C(OND(_MICROSECOND)?|URITY)|T({s}(PASSWORD|NAMES|ONE_SHOT))?|PARATOR|LECT)|QL(STATE|_(MAX_JOIN_SIZE|B(IG_(RESULT|SELECTS|TABLES)|UFFER_RESULT)|S(MALL_RESULT|ELECT_LIMIT|LAVE_SKIP_COUNTER|AFE_UPDATES)|NO_CACHE|CA(CHE|LC_FOUND_ROWS)|T(SI_(M(INUTE|ONTH)|SECOND|HOUR|YEAR|DAY|QUARTER|FRAC_SECOND|WEEK)|HREAD)|QUOTE_SHOW_CREATE|WARNINGS|LO(G_(BIN|OFF|UPDATE)|W_PRIORITY_UPDATES)|AUTO_IS_NULL)|EXCEPTION|WARNING)?|L(OW|AVE)|AVEPOINT)|YEAR(_MONTH)?|T(R(IGGER(S)?|U(NCATE|E)|A(NSACTION|ILING))|H(EN|AN)|YPE|I(ME(STAMP(DIFF|ADD)?)?|NY(BLOB|TEXT|INT))|O|E(RMINATED{s}BY|XT|MP(TABLE|ORARY))|ABLE(S(PACE)?|_NAME)?)|ZEROFILL|U(S(ING|E(R(_RESOURCES)?|_FRM)?|AGE)|N(SIGNED|COMMITTED|TIL|I(NSTALL({s}PLUGIN)?|CODE|ON|QUE)|D(O(_BUFFER_SIZE|FILE)?|EFINED)|KNOWN|LOCK)|TC_(TIME(STAMP)?|DATE)|P(GRADE|DATE))|V(IEW|A(R(BINARY|YING|CHAR(ACTER)?|IABLES)|LUE(S)?))|W(R(ITE|APPER)|H(ILE|E(RE|N))|ITH({s}PARSER)?|ORK|EEK|A(RNINGS|IT))) diff --git a/Source/SPSQLTokenizer.l b/Source/SPSQLTokenizer.l index f33d373d..5b08f312 100644 --- a/Source/SPSQLTokenizer.l +++ b/Source/SPSQLTokenizer.l @@ -50,7 +50,7 @@ size_t yyuoffset, yyuleng; s [ \t\n\r] dkey "delimiter" scol ";" -dval [!-゚] +dval [!-\x7E\x80-\xEF] compstart "begin"{s} compend {s}"end" %x comment -- cgit v1.2.3