Merge pull request #5 from loiccoyle/cleanup

tpb-ultra-lite
author: Matt <32886639+mattlyon93@users.noreply.github.com> 2020-09-09 20:07:59 +1000
committer: GitHub <noreply@github.com> 2020-09-09 20:07:59 +1000
commit: 1ad2946a5ff0902c8a028b7caec67037b8e61ce3 (patch)
tree: deec38a8c79849421bc95f7c035006d60713e838
parent: 09261d2adcd708d643fe7e1331f5c840ec207c41 (diff)
parent: 526f38208dd35e57cd419de6f4248f4cfde1faca (diff)
download: tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.gz
tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.bz2
tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.zip
4 files changed, 19 insertions, 25 deletions
diff --git a/README.md b/README.md
index 84ec85c..6c69069 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,7 @@ $ pip install tpblite
 ```
 
 Dependencies:
- - BeautifulSoup
- - purl
+ - lxml
 
 Usage
 ==========
diff --git a/setup.py b/setup.py
index 3cbb726..9bb031d 100644
--- a/setup.py
+++ b/setup.py
@@ -21,9 +21,7 @@ setup(name = 'tpblite',
         long_description_content_type='text/markdown',
         license = 'MIT License',
         packages = ['tpblite', 'tpblite/models'],
-        install_requires = [
-            'beautifulsoup4',
-            'purl'],
+        install_requires = ['lxml'],
         classifiers = [
             'Development Status :: 3 - Alpha',
             'Programming Language :: Python',
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
index 2a9bffb..1c2b32d 100644
--- a/tpblite/models/torrents.py
+++ b/tpblite/models/torrents.py
@@ -1,6 +1,5 @@
-import re
 import unicodedata
-from bs4 import BeautifulSoup
+from lxml.etree import HTML
 
 # TODO: write better comments
 
@@ -47,19 +46,18 @@ class Torrent:
         return "<Torrent object: {}>".format(self.title)
 
     def _getTitle(self):
-        return self.html_row.find("a", class_="detLink").string
+        return self.html_row.findtext('.//a[@class="detLink"]')
 
     def _getMagnetLink(self):
-        tag = self.html_row.find("a", href=(re.compile("magnet")))
-        link = tag.get("href")
-        return link
+        return self.html_row.xpath('.//a[starts-with(@href, "magnet")]/@href')[0]
 
     def _getPeers(self):
-        taglist = self.html_row.find_all("td", align="right")
-        return int(taglist[0].string), int(taglist[1].string)
+        taglist = self.html_row.xpath('.//td[@align="right"]/text()')
+        return int(taglist[0]), int(taglist[1])
 
     def _getFileInfo(self):
-        text = self.html_row.find("font", class_="detDesc").get_text()
+        text = self.html_row.xpath('.//font[@class="detDesc"]/descendant::text()')
+        text = ''.join(text)
         t = text.split(",")
         uptime = unicodedata.normalize("NFKD", t[0].replace("Uploaded ", "").strip())
         size = unicodedata.normalize("NFKD", t[1].replace("Size ", "").strip())
@@ -68,7 +66,7 @@ class Torrent:
         return uptime, size, byte_size, uploader
 
     def _getUrl(self):
-        tag = self.html_row.find("a", class_="detLink")
+        tag = self.html_row.find('.//a[@class="detLink"]')
         return tag.get("href")
 
 
@@ -99,14 +97,13 @@ class Torrents:
         return self.list[index]
 
     def _createTorrentList(self):
-        soup = BeautifulSoup(self.html_source, features="html.parser")
-        if soup.body is None:
+        root = HTML(self.html_source)
+        if root.find("body") is None:
             raise ConnectionError("Could not determine torrents (empty html body)")
-        rows = soup.body.find_all("tr")
+        rows = root.xpath('//tr[td[@class="vertTh"]]')
         torrents = []
         for row in rows:
-            if len(row.find_all("td", {"class": "vertTh"})) == 1:
-                torrents.append(Torrent(row))
+            torrents.append(Torrent(row))
         return torrents
 
     def getBestTorrent(self, min_seeds=30, min_filesize="1 GiB", max_filesize="4 GiB"):
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index b9126e9..c5b5cfd 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,8 +1,8 @@
 from typing import Tuple, Type, TypeVar
 import random
 from urllib.request import Request, urlopen
+from urllib.parse import urlparse, urlunparse, quote
 import urllib.error
-from purl import URL as pURL
 
 # https://github.com/python/typing/issues/58#issuecomment-326240794
 T = TypeVar("T", bound="QueryParser")
@@ -26,7 +26,7 @@ class QueryParser:
             self.html_source = self._sendRequest()
         except urllib.error.URLError:
             raise ConnectionError(
-                "Could not establish connection wtih {}".format(self.base_url)
+                "Could not establish connection with {}".format(self.url)
             )
 
     @classmethod
@@ -58,9 +58,9 @@ class QueryParser:
 
 
 def URL(base: str, segments: Tuple[str, ...]) -> str:
-    u = pURL().from_string(base)
-    url = u.path_segments(segments)
-    return url.as_string()
+    url = list(urlparse(base))
+    url[2] = '/'.join((quote(s) for s in segments))
+    return urlunparse(url)
 
 
 def headers():
author	Matt <32886639+mattlyon93@users.noreply.github.com>	2020-09-09 20:07:59 +1000
committer	GitHub <noreply@github.com>	2020-09-09 20:07:59 +1000
commit	1ad2946a5ff0902c8a028b7caec67037b8e61ce3 (patch)
tree	deec38a8c79849421bc95f7c035006d60713e838
parent	09261d2adcd708d643fe7e1331f5c840ec207c41 (diff)
parent	526f38208dd35e57cd419de6f4248f4cfde1faca (diff)
download	tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.gz tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.tar.bz2 tpb-lite-1ad2946a5ff0902c8a028b7caec67037b8e61ce3.zip