diff options
author | Matt <32886639+mattlyon93@users.noreply.github.com> | 2020-09-09 20:07:59 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-09 20:07:59 +1000 |
commit | 1ad2946a5ff0902c8a028b7caec67037b8e61ce3 (patch) | |
tree | deec38a8c79849421bc95f7c035006d60713e838 | |
parent | 09261d2adcd708d643fe7e1331f5c840ec207c41 (diff) | |
parent | 526f38208dd35e57cd419de6f4248f4cfde1faca (diff) | |
download | tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.gz tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.bz2 tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.zip |
Merge pull request #5 from loiccoyle/cleanup
tpb-ultra-lite
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | setup.py | 4 | ||||
-rw-r--r-- | tpblite/models/torrents.py | 27 | ||||
-rw-r--r-- | tpblite/models/utils.py | 10 |
4 files changed, 19 insertions, 25 deletions
@@ -9,8 +9,7 @@ $ pip install tpblite ``` Dependencies: - - BeautifulSoup - - purl + - lxml Usage ========== @@ -21,9 +21,7 @@ setup(name = 'tpblite', long_description_content_type='text/markdown', license = 'MIT License', packages = ['tpblite', 'tpblite/models'], - install_requires = [ - 'beautifulsoup4', - 'purl'], + install_requires = ['lxml'], classifiers = [ 'Development Status :: 3 - Alpha', 'Programming Language :: Python', diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py index 2a9bffb..1c2b32d 100644 --- a/tpblite/models/torrents.py +++ b/tpblite/models/torrents.py @@ -1,6 +1,5 @@ -import re import unicodedata -from bs4 import BeautifulSoup +from lxml.etree import HTML # TODO: write better comments @@ -47,19 +46,18 @@ class Torrent: return "<Torrent object: {}>".format(self.title) def _getTitle(self): - return self.html_row.find("a", class_="detLink").string + return self.html_row.findtext('.//a[@class="detLink"]') def _getMagnetLink(self): - tag = self.html_row.find("a", href=(re.compile("magnet"))) - link = tag.get("href") - return link + return self.html_row.xpath('.//a[starts-with(@href, "magnet")]/@href')[0] def _getPeers(self): - taglist = self.html_row.find_all("td", align="right") - return int(taglist[0].string), int(taglist[1].string) + taglist = self.html_row.xpath('.//td[@align="right"]/text()') + return int(taglist[0]), int(taglist[1]) def _getFileInfo(self): - text = self.html_row.find("font", class_="detDesc").get_text() + text = self.html_row.xpath('.//font[@class="detDesc"]/descendant::text()') + text = ''.join(text) t = text.split(",") uptime = unicodedata.normalize("NFKD", t[0].replace("Uploaded ", "").strip()) size = unicodedata.normalize("NFKD", t[1].replace("Size ", "").strip()) @@ -68,7 +66,7 @@ class Torrent: return uptime, size, byte_size, uploader def _getUrl(self): - tag = self.html_row.find("a", class_="detLink") + tag = self.html_row.find('.//a[@class="detLink"]') return tag.get("href") @@ -99,14 +97,13 @@ class Torrents: return self.list[index] def _createTorrentList(self): - soup = BeautifulSoup(self.html_source, features="html.parser") - if soup.body is None: + root = HTML(self.html_source) + if root.find("body") is None: raise ConnectionError("Could not determine torrents (empty html body)") - rows = soup.body.find_all("tr") + rows = root.xpath('//tr[td[@class="vertTh"]]') torrents = [] for row in rows: - if len(row.find_all("td", {"class": "vertTh"})) == 1: - torrents.append(Torrent(row)) + torrents.append(Torrent(row)) return torrents def getBestTorrent(self, min_seeds=30, min_filesize="1 GiB", max_filesize="4 GiB"): diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py index b9126e9..c5b5cfd 100644 --- a/tpblite/models/utils.py +++ b/tpblite/models/utils.py @@ -1,8 +1,8 @@ from typing import Tuple, Type, TypeVar import random from urllib.request import Request, urlopen +from urllib.parse import urlparse, urlunparse, quote import urllib.error -from purl import URL as pURL # https://github.com/python/typing/issues/58#issuecomment-326240794 T = TypeVar("T", bound="QueryParser") @@ -26,7 +26,7 @@ class QueryParser: self.html_source = self._sendRequest() except urllib.error.URLError: raise ConnectionError( - "Could not establish connection wtih {}".format(self.base_url) + "Could not establish connection with {}".format(self.url) ) @classmethod @@ -58,9 +58,9 @@ class QueryParser: def URL(base: str, segments: Tuple[str, ...]) -> str: - u = pURL().from_string(base) - url = u.path_segments(segments) - return url.as_string() + url = list(urlparse(base)) + url[2] = '/'.join((quote(s) for s in segments)) + return urlunparse(url) def headers(): |