diff options
author | Max <post@wickenrode.com> | 2015-01-27 01:55:18 +0100 |
---|---|---|
committer | Max <post@wickenrode.com> | 2015-01-27 01:55:18 +0100 |
commit | cf2559d98a00699462bc32f3e19753a9ca547a9c (patch) | |
tree | cbd04ba88f6bf759ebbc9a387bdfdaca5fd139c4 /Source/SPEditorTokens.l | |
parent | d7de5dc07fbfa52af7020416bbf42747ed905577 (diff) | |
download | sequelpro-cf2559d98a00699462bc32f3e19753a9ca547a9c.tar.gz sequelpro-cf2559d98a00699462bc32f3e19753a9ca547a9c.tar.bz2 sequelpro-cf2559d98a00699462bc32f3e19753a9ca547a9c.zip |
Move some duplicate code into it's own file
Diffstat (limited to 'Source/SPEditorTokens.l')
-rw-r--r-- | Source/SPEditorTokens.l | 55 |
1 files changed, 1 insertions, 54 deletions
diff --git a/Source/SPEditorTokens.l b/Source/SPEditorTokens.l index c10dbd60..a40105a1 100644 --- a/Source/SPEditorTokens.l +++ b/Source/SPEditorTokens.l @@ -43,8 +43,8 @@ */ #import "SPEditorTokens.h" +#include "SPParserUtils.h" -size_t utf8strlen(const char * _s); size_t yyuoffset, yyuleng; //keep track of the current utf-8 character (not byte) offset and token length @@ -126,59 +126,6 @@ keywords (X(OR|509|A)|S(MALLINT|SL|H(OW({s}(E(NGINE(S)?|RRORS)|M(ASTER|UTEX)|BIN } %% -#define ONEMASK ((size_t)(-1) / 0xFF) -// adapted from http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html -size_t utf8strlen(const char * _s) -{ - const char * s; - size_t count = 0; - size_t u; - unsigned char b; - - /* Handle any initial misaligned bytes. */ - for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) { - b = *s; - - /* Exit if we hit a zero byte. */ - if (b == '\0') - goto done; - - /* Is this byte NOT the first byte of a character? */ - count += (b >> 7) & ((~b) >> 6); - } - - /* Handle complete blocks. */ - for (; ; s += sizeof(size_t)) { - /* Prefetch 256 bytes ahead. */ - __builtin_prefetch(&s[256], 0, 0); - - /* Grab 4 or 8 bytes of UTF-8 data. */ - u = *(size_t *)(s); - - /* Exit the loop if there are any zero bytes. */ - if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80)) - break; - - /* Count bytes which are NOT the first byte of a character. */ - u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6); - count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8); - } - - /* Take care of any left-over bytes. */ - for (; ; s++) { - b = *s; - - /* Exit if we hit a zero byte. */ - if (b == '\0') - break; - - /* Is this byte NOT the first byte of a character? */ - count += (b >> 7) & ((~b) >> 6); - } - -done: - return ((s - _s) - count); -} /* un-optimized keywords: ACCESSIBLE |