From ae9e8c258acb277eb10220c0ff3a803091efa923 Mon Sep 17 00:00:00 2001 From: Matt Lyon Date: Sat, 2 Nov 2019 17:06:18 +1100 Subject: added utils and constants --- tpblite/models/utils.py | 83 ++++++------------------------------------------- 1 file changed, 10 insertions(+), 73 deletions(-) (limited to 'tpblite/models/utils.py') diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py index 4aebc0d..72409b8 100644 --- a/tpblite/models/utils.py +++ b/tpblite/models/utils.py @@ -1,10 +1,7 @@ import random from urllib.request import Request, urlopen +from purl import URL as pURL -# Delete these when finished rewriting URL -from collections import OrderedDict -from purl import URL as PURL -# ============================== class Query(object): ''' @@ -13,87 +10,27 @@ class Query(object): ''' def __init__(self, query, base_url='https://tpb.party', page=0, order=99, category=0): self.base_url = base_url - self.base_path = '/search' - self.url = URL(base_url, self.base_path, - segments=['query', 'page', 'order', 'category'], - defaults=[query, str(page), str(order), str(category)], - ) + segments = ('search', query, str(page), str(order), str(category)) + self.url = URL(base_url, segments) self.webpage = self._sendRequest() - + def _sendRequest(self): req = Request(self.url, headers=headers()) return urlopen(req).read() - -### REWRITE THEN DELETE THESE - -def URL(base, path, segments=None, defaults=None): - """ - URL segment handler capable of getting and setting segments by name. The - URL is constructed by joining base, path and segments. - - For each segment a property capable of getting and setting that segment is - created dynamically. - """ - # Make a copy of the Segments class - url_class = type(Segments.__name__, Segments.__bases__, - dict(Segments.__dict__)) - segments = [] if segments is None else segments - defaults = [] if defaults is None else defaults - # For each segment attach a property capable of getting and setting it - for segment in segments: - setattr(url_class, segment, url_class._segment(segment)) - # Instantiate the class with the actual parameters - return url_class(base, path, segments, defaults) - - -class Segments(object): - - """ - URL segment handler, not intended for direct use. The URL is constructed by - joining base, path and segments. - """ - - def __init__(self, base, path, segments, defaults): - # Preserve the base URL - self.base = PURL(base, path=path) - # Map the segments and defaults lists to an ordered dict - self.segments = OrderedDict(zip(segments, defaults)) - - def build(self): - # Join base segments and segments - segments = self.base.path_segments() + tuple(self.segments.values()) - # Create a new URL with the segments replaced - url = self.base.path_segments(segments) - return url - - def __str__(self): - return self.build().as_string() - - def _get_segment(self, segment): - return self.segments[segment] - - def _set_segment(self, segment, value): - self.segments[segment] = value - - @classmethod - def _segment(cls, segment): - """ - Returns a property capable of setting and getting a segment. - """ - return property( - fget=lambda x: cls._get_segment(x, segment), - fset=lambda x, v: cls._set_segment(x, segment, v), - ) +def URL(base, segments): + u = pURL().from_string(base) + url = u.path_segments(segments) + return url.as_string() def headers(): - """ + ''' The Pirate Bay blocks requests (403 Forbidden) basing on User-Agent header, so it's probably better to rotate them. User-Agents taken from: https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ - """ + ''' return { "User-Agent": random.choice(USER_AGENTS), "origin_req_host": "thepiratebay.se", -- cgit v1.2.3