aboutsummaryrefslogtreecommitdiffstats
path: root/tpblite/models/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'tpblite/models/utils.py')
-rw-r--r--tpblite/models/utils.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
new file mode 100644
index 0000000..4aebc0d
--- /dev/null
+++ b/tpblite/models/utils.py
@@ -0,0 +1,118 @@
+import random
+from urllib.request import Request, urlopen
+
+# Delete these when finished rewriting URL
+from collections import OrderedDict
+from purl import URL as PURL
+# ==============================
+
+class Query(object):
+ '''
+ Query object capable of getting html response given
+ a search query and other parameters.
+ '''
+ def __init__(self, query, base_url='https://tpb.party', page=0, order=99, category=0):
+ self.base_url = base_url
+ self.base_path = '/search'
+ self.url = URL(base_url, self.base_path,
+ segments=['query', 'page', 'order', 'category'],
+ defaults=[query, str(page), str(order), str(category)],
+ )
+ self.webpage = self._sendRequest()
+
+ def _sendRequest(self):
+ req = Request(self.url, headers=headers())
+ return urlopen(req).read()
+
+
+### REWRITE THEN DELETE THESE
+
+def URL(base, path, segments=None, defaults=None):
+ """
+ URL segment handler capable of getting and setting segments by name. The
+ URL is constructed by joining base, path and segments.
+
+ For each segment a property capable of getting and setting that segment is
+ created dynamically.
+ """
+ # Make a copy of the Segments class
+ url_class = type(Segments.__name__, Segments.__bases__,
+ dict(Segments.__dict__))
+ segments = [] if segments is None else segments
+ defaults = [] if defaults is None else defaults
+ # For each segment attach a property capable of getting and setting it
+ for segment in segments:
+ setattr(url_class, segment, url_class._segment(segment))
+ # Instantiate the class with the actual parameters
+ return url_class(base, path, segments, defaults)
+
+
+class Segments(object):
+
+ """
+ URL segment handler, not intended for direct use. The URL is constructed by
+ joining base, path and segments.
+ """
+
+ def __init__(self, base, path, segments, defaults):
+ # Preserve the base URL
+ self.base = PURL(base, path=path)
+ # Map the segments and defaults lists to an ordered dict
+ self.segments = OrderedDict(zip(segments, defaults))
+
+ def build(self):
+ # Join base segments and segments
+ segments = self.base.path_segments() + tuple(self.segments.values())
+ # Create a new URL with the segments replaced
+ url = self.base.path_segments(segments)
+ return url
+
+ def __str__(self):
+ return self.build().as_string()
+
+ def _get_segment(self, segment):
+ return self.segments[segment]
+
+ def _set_segment(self, segment, value):
+ self.segments[segment] = value
+
+ @classmethod
+ def _segment(cls, segment):
+ """
+ Returns a property capable of setting and getting a segment.
+ """
+ return property(
+ fget=lambda x: cls._get_segment(x, segment),
+ fset=lambda x, v: cls._set_segment(x, segment, v),
+ )
+
+
+def headers():
+ """
+ The Pirate Bay blocks requests (403 Forbidden)
+ basing on User-Agent header, so it's probably better to rotate them.
+ User-Agents taken from:
+ https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
+ """
+ return {
+ "User-Agent": random.choice(USER_AGENTS),
+ "origin_req_host": "thepiratebay.se",
+ }
+
+
+USER_AGENTS = (
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.101 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+)
+
+### ==================== \ No newline at end of file