replace beautifulsoup4 with lxml and remove purl dependency

author: Loic Coyle <loic.coyle@hotmail.fr> 2020-08-22 20:47:18 +0200
committer: Loic Coyle <loic.coyle@hotmail.fr> 2020-08-22 22:11:26 +0200
commit: 526f38208dd35e57cd419de6f4248f4cfde1faca (patch)
tree: 4c37994a0d511aafaa40ee93547d04c88094ca43 /tpblite/models/torrents.py
parent: e9e1c637b95f609a3053c243f2f5837d5214f3c0 (diff)
download: tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.tar.gz
tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.tar.bz2
tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.zip
1 files changed, 12 insertions, 15 deletions
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
index 5145289..4eb5a16 100644
--- a/tpblite/models/torrents.py
+++ b/tpblite/models/torrents.py
@@ -1,6 +1,5 @@
-import re
 import unicodedata
-from bs4 import BeautifulSoup
+from lxml.etree import HTML
 
 # TODO: write better comments
 
@@ -44,19 +43,18 @@ class Torrent:
         return "<Torrent object: {}>".format(self.title)
 
     def _getTitle(self):
-        return self.html_row.find("a", class_="detLink").string
+        return self.html_row.findtext('.//a[@class="detLink"]')
 
     def _getMagnetLink(self):
-        tag = self.html_row.find("a", href=(re.compile("magnet")))
-        link = tag.get("href")
-        return link
+        return self.html_row.xpath('.//a[starts-with(@href, "magnet")]/@href')[0]
 
     def _getPeers(self):
-        taglist = self.html_row.find_all("td", align="right")
-        return int(taglist[0].string), int(taglist[1].string)
+        taglist = self.html_row.xpath('.//td[@align="right"]/text()')
+        return int(taglist[0]), int(taglist[1])
 
     def _getFileInfo(self):
-        text = self.html_row.find("font", class_="detDesc").get_text()
+        text = self.html_row.xpath('.//font[@class="detDesc"]/descendant::text()')
+        text = ''.join(text)
         t = text.split(",")
         uptime = unicodedata.normalize("NFKD", t[0].replace("Uploaded ", "").strip())
         size = unicodedata.normalize("NFKD", t[1].replace("Size ", "").strip())
@@ -65,7 +63,7 @@ class Torrent:
         return uptime, size, byte_size, uploader
 
     def _getUrl(self):
-        tag = self.html_row.find("a", class_="detLink")
+        tag = self.html_row.find('.//a[@class="detLink"]')
         return tag.get("href")
 
 
@@ -96,14 +94,13 @@ class Torrents:
         return self.list[index]
 
     def _createTorrentList(self):
-        soup = BeautifulSoup(self.html_source, features="html.parser")
-        if soup.body is None:
+        root = HTML(self.html_source)
+        if root.find("body") is None:
             raise ConnectionError("Could not determine torrents (empty html body)")
-        rows = soup.body.find_all("tr")
+        rows = root.xpath('//tr[td[@class="vertTh"]]')
         torrents = []
         for row in rows:
-            if len(row.find_all("td", {"class": "vertTh"})) == 1:
-                torrents.append(Torrent(row))
+            torrents.append(Torrent(row))
         return torrents
 
     def getBestTorrent(self, min_seeds=30, min_filesize="1 GiB", max_filesize="4 GiB"):
author	Loic Coyle <loic.coyle@hotmail.fr>	2020-08-22 20:47:18 +0200
committer	Loic Coyle <loic.coyle@hotmail.fr>	2020-08-22 22:11:26 +0200
commit	526f38208dd35e57cd419de6f4248f4cfde1faca (patch)
tree	4c37994a0d511aafaa40ee93547d04c88094ca43 /tpblite/models/torrents.py
parent	e9e1c637b95f609a3053c243f2f5837d5214f3c0 (diff)
download	tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.tar.gz tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.tar.bz2 tpb-lite-526f38208dd35e57cd419de6f4248f4cfde1faca.zip