diff options
author | JPFrancoia <jeanpatrick.francoia@gmail.com> | 2019-12-24 17:46:08 +0100 |
---|---|---|
committer | JPFrancoia <jeanpatrick.francoia@gmail.com> | 2019-12-24 17:46:08 +0100 |
commit | 4f00535cc83f57aac4b2a420907497e15f7c2f35 (patch) | |
tree | 55a6b724f0721e7bed340f25827851e854e09f8a /tpblite/models/utils.py | |
parent | c45b6ca3e82a5d10e14c31c4b7d0fdaf66fff933 (diff) | |
download | tpb-lite-4f00535cc83f57aac4b2a420907497e15f7c2f35.tar.gz tpb-lite-4f00535cc83f57aac4b2a420907497e15f7c2f35.tar.bz2 tpb-lite-4f00535cc83f57aac4b2a420907497e15f7c2f35.zip |
Adding a browse() method to the TPB object, to get torrents by category,
without any query.
Also ran Black + cleaned some docstrings + added some type annotations.
Diffstat (limited to 'tpblite/models/utils.py')
-rw-r--r-- | tpblite/models/utils.py | 73 |
1 files changed, 48 insertions, 25 deletions
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py index 1d6b351..eb24f8c 100644 --- a/tpblite/models/utils.py +++ b/tpblite/models/utils.py @@ -1,40 +1,63 @@ +from typing import Tuple, Type import random from urllib.request import Request, urlopen import urllib.error from purl import URL as pURL -class QueryParser(object): - ''' - Query object capable of getting html response given - a search query and other parameters. - ''' - def __init__(self, query, base_url, page, order, category): +class QueryParser: + """Query object capable of getting html response given a search query and other + parameters. + """ + + # PirateBay URL to use for queries + base_url: str + + # Compiled search string used to query the PirateBay URL + url: str + + def __init__(self, base_url: str, segments: Tuple[str, ...]): self.base_url = base_url - segments = ('search', query, str(page), str(order), str(category)) self.url = URL(base_url, segments) try: self.html_source = self._sendRequest() except urllib.error.URLError: - raise ConnectionError('Could not establish connection wtih {}'.format(self.base_url)) - + raise ConnectionError( + "Could not establish connection wtih {}".format(self.base_url) + ) + + @classmethod + def from_search( + cls, query: str, base_url: str, page: int, order: int, category: int + ): + segments = ("search", query, str(page), str(order), str(category)) + return cls(base_url, segments) + + @classmethod + def from_browse(cls, base_url: str, category: int, page: int, order: int): + print("browsing") + segments = ("browse", str(category), str(page), str(order), "0") + + return cls(base_url, segments) + def _sendRequest(self): req = Request(self.url, headers=headers()) return urlopen(req).read() -def URL(base, segments): + +def URL(base: str, segments: Tuple[str, ...]) -> str: u = pURL().from_string(base) url = u.path_segments(segments) return url.as_string() def headers(): - ''' + """ The Pirate Bay blocks requests (403 Forbidden) basing on User-Agent header, so it's probably better to rotate them. User-Agents taken from: https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ - ''' + """ return { "User-Agent": random.choice(USER_AGENTS), "origin_req_host": "thepiratebay.se", @@ -42,16 +65,16 @@ def headers(): USER_AGENTS = ( - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/60.0.3112.113 Safari/537.36', - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/60.0.3112.101 Safari/537.36', - 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/60.0.3112.113 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) ' - 'AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/60.0.3112.113 Safari/537.36', -)
\ No newline at end of file + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/60.0.3112.113 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/60.0.3112.101 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/60.0.3112.113 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/60.0.3112.113 Safari/537.36", +) |