Initial commit

First public commit
author: Filipp Lepalaan <filipp@mac.com> 2015-08-04 10:11:24 +0300
committer: Filipp Lepalaan <filipp@mac.com> 2015-08-04 10:11:24 +0300
commit: 63b0fc6269b38edf7234b9f151b80d81f614c0a3 (patch)
tree: 555de3068f33f8dddb4619349bbea7d9b7c822fd /servo/lib/shorturl.py
download: Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.gz
Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.bz2
Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.zip
1 files changed, 130 insertions, 0 deletions
diff --git a/servo/lib/shorturl.py b/servo/lib/shorturl.py
new file mode 100644
index 0000000..f93028f
--- /dev/null
+++ b/servo/lib/shorturl.py
@@ -0,0 +1,130 @@
+'''
+Short URL Generator
+===================
+
+Python implementation for generating Tiny URL- and bit.ly-like URLs.
+
+A bit-shuffling approach is used to avoid generating consecutive, predictable 
+URLs.  However, the algorithm is deterministic and will guarantee that no 
+collisions will occur.
+
+The URL alphabet is fully customizable and may contain any number of 
+characters.  By default, digits and lower-case letters are used, with 
+some removed to avoid confusion between characters like o, O and 0.  The 
+default alphabet is shuffled and has a prime number of characters to further 
+improve the results of the algorithm.
+
+The block size specifies how many bits will be shuffled.  The lower BLOCK_SIZE 
+bits are reversed.  Any bits higher than BLOCK_SIZE will remain as is.
+BLOCK_SIZE of 0 will leave all bits unaffected and the algorithm will simply 
+be converting your integer to a different base.
+
+The intended use is that incrementing, consecutive integers will be used as 
+keys to generate the short URLs.  For example, when creating a new URL, the 
+unique integer ID assigned by a database could be used to generate the URL 
+by using this module.  Or a simple counter may be used.  As long as the same 
+integer is not used twice, the same short URL will not be generated twice.
+
+The module supports both encoding and decoding of URLs. The min_length 
+parameter allows you to pad the URL if you want it to be a specific length.
+
+Sample Usage:
+
+>>> import short_url
+>>> url = short_url.encode_url(12)
+>>> print url
+LhKA
+>>> key = short_url.decode_url(url)
+>>> print key
+12
+
+Use the functions in the top-level of the module to use the default encoder. 
+Otherwise, you may create your own UrlEncoder object and use its encode_url 
+and decode_url methods.
+
+Author: Michael Fogleman
+License: MIT
+Link: http://code.activestate.com/recipes/576918/
+'''
+
+MIN_LENGTH = 4
+DEFAULT_BLOCK_SIZE = 24
+DEFAULT_ALPHABET = 'mn6j2c4rvbpygw9z7hsdaetxuk3fq'
+
+class UrlEncoder(object):
+    def __init__(self, alphabet=DEFAULT_ALPHABET, block_size=DEFAULT_BLOCK_SIZE):
+        self.alphabet = alphabet
+        self.block_size = block_size
+        self.mask = (1 << block_size) - 1
+        self.mapping = range(block_size)
+        self.mapping.reverse()
+    def encode_url(self, n, min_length=MIN_LENGTH):
+        return self.enbase(self.encode(n), min_length)
+    def decode_url(self, n):
+        return self.decode(self.debase(n))
+    def encode(self, n):
+        return (n & ~self.mask) | self._encode(n & self.mask)
+    def _encode(self, n):
+        result = 0
+        for i, b in enumerate(self.mapping):
+            if n & (1 << i):
+                result |= (1 << b)
+        return result
+    def decode(self, n):
+        return (n & ~self.mask) | self._decode(n & self.mask)
+    def _decode(self, n):
+        result = 0
+        for i, b in enumerate(self.mapping):
+            if n & (1 << b):
+                result |= (1 << i)
+        return result
+    def enbase(self, x, min_length=MIN_LENGTH):
+        result = self._enbase(x)
+        padding = self.alphabet[0] * (min_length - len(result))
+        return '%s%s' % (padding, result)
+    def _enbase(self, x):
+        n = len(self.alphabet)
+        if x < n:
+            return self.alphabet[x]
+        return self._enbase(x / n) + self.alphabet[x % n]
+    def debase(self, x):
+        n = len(self.alphabet)
+        result = 0
+        for i, c in enumerate(reversed(x)):
+            result += self.alphabet.index(c) * (n ** i)
+        return result
+
+DEFAULT_ENCODER = UrlEncoder()
+
+def encode(n):
+    return DEFAULT_ENCODER.encode(n)
+
+def decode(n):
+    return DEFAULT_ENCODER.decode(n)
+
+def enbase(n, min_length=MIN_LENGTH):
+    return DEFAULT_ENCODER.enbase(n, min_length)
+
+def debase(n):
+    return DEFAULT_ENCODER.debase(n)
+
+def encode_url(n, min_length=MIN_LENGTH):
+    return DEFAULT_ENCODER.encode_url(n, min_length)
+
+def decode_url(n):
+    return DEFAULT_ENCODER.decode_url(n)
+
+def from_time():
+    from time import time
+    return encode_url(int(time()*1000)).upper()
+
+if __name__ == '__main__':
+    for a in range(0, 200000, 37):
+        b = encode(a)
+        c = enbase(b)
+        d = debase(c)
+        e = decode(d)
+        assert a == e
+        assert b == d
+        c = (' ' * (7 - len(c))) + c
+        print '%6d %12d %s %12d %6d' % (a, b, c, d, e)
author	Filipp Lepalaan <filipp@mac.com>	2015-08-04 10:11:24 +0300
committer	Filipp Lepalaan <filipp@mac.com>	2015-08-04 10:11:24 +0300
commit	63b0fc6269b38edf7234b9f151b80d81f614c0a3 (patch)
tree	555de3068f33f8dddb4619349bbea7d9b7c822fd /servo/lib/shorturl.py
download	Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.gz Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.tar.bz2 Servo-63b0fc6269b38edf7234b9f151b80d81f614c0a3.zip