From 4f00535cc83f57aac4b2a420907497e15f7c2f35 Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Tue, 24 Dec 2019 17:46:08 +0100
Subject: Adding a browse() method to the TPB object, to get torrents by
 category, without any query.

Also ran Black + cleaned some docstrings + added some type annotations.
---
 tpblite/models/torrents.py | 135 ++++++++++++++++++++++-----------------------
 tpblite/models/utils.py    |  73 +++++++++++++++---------
 tpblite/tpblite.py         |  70 ++++++++++++++++-------
 3 files changed, 165 insertions(+), 113 deletions(-)

(limited to 'tpblite')
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
index 2d8bc6a..ada282f 100644
--- a/tpblite/models/torrents.py
+++ b/tpblite/models/torrents.py
@@ -2,133 +2,132 @@ import re
 import unicodedata
 from bs4 import BeautifulSoup
 
-#TODO: write better comments
+# TODO: write better comments
+
 
 def fileSizeStrToInt(size_str):
-    '''Converts file size given in *iB format to bytes integer'''
-    
-    unit_dict = {'KiB':(2**10),
-                 'MiB':(2**20),
-                 'GiB':(2**30),
-                 'TiB':(2**40)}
+    """Converts file size given in *iB format to bytes integer"""
+
+    unit_dict = {"KiB": (2 ** 10), "MiB": (2 ** 20), "GiB": (2 ** 30), "TiB": (2 ** 40)}
     try:
         num = float(size_str[:-3])
         unit = size_str[-3:]
         return int(num * unit_dict[unit])
     except Exception as e:
-        raise AttributeError('Cannot determine filesize: {0}, error: {1}'.format(size_str,e))
-    
-class Torrent(object):
-    '''
+        raise AttributeError(
+            "Cannot determine filesize: {0}, error: {1}".format(size_str, e)
+        )
+
+
+class Torrent:
+    """
     Abstract class to contain info about torrent
     magnet link, file size, number of seeds, number of leeches etc.
-    '''
+    """
+
     def __init__(self, html_row):
         self.html_row = html_row
         self.title = self._getTitle()
         self.seeds, self.leeches = self._getPeers()
-        self.upload_date, self.filesize, self.byte_size, self.uploader = self._getFileInfo()
+        self.upload_date, self.filesize, self.byte_size, self.uploader = (
+            self._getFileInfo()
+        )
         self.magnetlink = self._getMagnetLink()
-        
+
     def __str__(self):
-        return '{0}, S: {1}, L: {2}, {3}'.format(self.title,
-                                                self.seeds,
-                                                self.leeches,
-                                                self.filesize)
-        
+        return "{0}, S: {1}, L: {2}, {3}".format(
+            self.title, self.seeds, self.leeches, self.filesize
+        )
+
     def __repr__(self):
-        return '<Torrent object: {}>'.format(self.title)
+        return "<Torrent object: {}>".format(self.title)
 
     def _getTitle(self):
-        return self.html_row.find('a', class_='detLink').string
+        return self.html_row.find("a", class_="detLink").string
 
     def _getMagnetLink(self):
-        tag = self.html_row.find('a', href=(re.compile('magnet')))
-        link = tag.get('href')
+        tag = self.html_row.find("a", href=(re.compile("magnet")))
+        link = tag.get("href")
         return link
-    
+
     def _getPeers(self):
-        taglist = self.html_row.find_all('td', align='right')
+        taglist = self.html_row.find_all("td", align="right")
         return int(taglist[0].string), int(taglist[1].string)
-    
+
     def _getFileInfo(self):
-        text = self.html_row.find('font', class_='detDesc').get_text()
-        t = text.split(',')
-        uptime = unicodedata.normalize('NFKD', t[0].replace('Uploaded ','').strip())
-        size = unicodedata.normalize('NFKD', t[1].replace('Size ', '').strip())
+        text = self.html_row.find("font", class_="detDesc").get_text()
+        t = text.split(",")
+        uptime = unicodedata.normalize("NFKD", t[0].replace("Uploaded ", "").strip())
+        size = unicodedata.normalize("NFKD", t[1].replace("Size ", "").strip())
         byte_size = fileSizeStrToInt(size)
-        uploader = unicodedata.normalize('NFKD', t[2].replace('ULed by ', '').strip())
+        uploader = unicodedata.normalize("NFKD", t[2].replace("ULed by ", "").strip())
         return uptime, size, byte_size, uploader
-    
-    
-class Torrents(object):
-    '''
+
+
+class Torrents:
+    """
     Torrent object, takes query response and parses into 
     torrent list or dict. Has methods to select items from
     torrent list.
-    '''
-    def __init__(self, search_str, html_source):
-        self.search_str = search_str
-        self.__search_set = None
-        
+    """
+
+    def __init__(self, html_source):
         self.html_source = html_source
         self.list = self._createTorrentList()
-        
+
     def __str__(self):
-        return 'Torrents object: {} torrents'.format(len(self.list))
-    
+        return "Torrents object: {} torrents".format(len(self.list))
+
     def __repr__(self):
-        return '<Torrents object: {} torrents>'.format(len(self.list))
-        
+        return "<Torrents object: {} torrents>".format(len(self.list))
+
     def __iter__(self):
         return iter(self.list)
 
     def __len__(self):
         return len(self.list)
 
-    def __getitem__(self,index):
+    def __getitem__(self, index):
         return self.list[index]
 
-    @property
-    def _search_set(self):
-        if self.__search_set is None:
-            self.__search_set = set(filter(None, re.split(r'[\s.|\(|\)]',self.search_str.lower())))
-        return self.__search_set
-
     def _createTorrentList(self):
-        soup = BeautifulSoup(self.html_source, features='html.parser')
+        soup = BeautifulSoup(self.html_source, features="html.parser")
         if soup.body is None:
-            raise ConnectionError('Could not determine torrents (empty html body)')
-        rows = soup.body.find_all('tr')
+            raise ConnectionError("Could not determine torrents (empty html body)")
+        rows = soup.body.find_all("tr")
         torrents = []
         for row in rows:
-            # Get the lowercase unique set from the row text
-            text_set = set(filter(None, re.split(r'[\s.|\(|\)]',row.text.lower())))
-            # Check if search string is subset
-            if self._search_set.issubset(text_set):
+            if len(row.find_all("td", {"class": "vertTh"})) == 1:
                 torrents.append(Torrent(row))
         return torrents
-    
-    def getBestTorrent(self, min_seeds=30, min_filesize='1 GiB', max_filesize='4 GiB'):
-        '''Filters torrent list based on some constraints, then returns highest seeded torrent
+
+    def getBestTorrent(self, min_seeds=30, min_filesize="1 GiB", max_filesize="4 GiB"):
+        """Filters torrent list based on some constraints, then returns highest seeded torrent
         :param min_seeds (int): minimum seed number filter
         :param min_filesize (str): minimum filesize in XiB form, eg. GiB
         :param max_filesize (str): maximum filesize in XiB form, eg. GiB
-        :return Torrent Object: Torrent with highest seed number, will return None if all are filtered out'''
+        :return Torrent Object: Torrent with highest seed number, will return None if all are filtered out"""
         if not isinstance(min_filesize, int):
             min_filesize = fileSizeStrToInt(min_filesize)
         if not isinstance(max_filesize, int):
             max_filesize = fileSizeStrToInt(max_filesize)
-        filtered_list = filter(lambda x: self._filterTorrent(x, min_seeds, min_filesize, max_filesize), self.list)
+        filtered_list = filter(
+            lambda x: self._filterTorrent(x, min_seeds, min_filesize, max_filesize),
+            self.list,
+        )
         sorted_list = sorted(filtered_list, key=lambda x: x.seeds, reverse=True)
         if len(sorted_list) > 0:
             return sorted_list[0]
         else:
-            print('No torrents found given criteria')
+            print("No torrents found given criteria")
             return None
-        
+
     def _filterTorrent(self, torrent, min_seeds, min_filesize, max_filesize):
-        if (torrent.seeds < min_seeds) or (torrent.byte_size < min_filesize) or (torrent.byte_size > max_filesize):
+        if (
+            (torrent.seeds < min_seeds)
+            or (torrent.byte_size < min_filesize)
+            or (torrent.byte_size > max_filesize)
+        ):
             return False
         else:
-            return True
\ No newline at end of file
+            return True
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index 1d6b351..eb24f8c 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,40 +1,63 @@
+from typing import Tuple, Type
 import random
 from urllib.request import Request, urlopen
 import urllib.error
 from purl import URL as pURL
 
 
-class QueryParser(object):
-    '''
-    Query object capable of getting html response given 
-    a search query and other parameters.
-    '''
-    def __init__(self, query, base_url, page, order, category):
+class QueryParser:
+    """Query object capable of getting html response given a search query and other
+    parameters.
+    """
+
+    # PirateBay URL to use for queries
+    base_url: str
+
+    # Compiled search string used to query the PirateBay URL
+    url: str
+
+    def __init__(self, base_url: str, segments: Tuple[str, ...]):
         self.base_url = base_url
-        segments = ('search', query, str(page), str(order), str(category))
         self.url = URL(base_url, segments)
         try:
             self.html_source = self._sendRequest()
         except urllib.error.URLError:
-            raise ConnectionError('Could not establish connection wtih {}'.format(self.base_url))
-     
+            raise ConnectionError(
+                "Could not establish connection wtih {}".format(self.base_url)
+            )
+
+    @classmethod
+    def from_search(
+        cls, query: str, base_url: str, page: int, order: int, category: int
+    ):
+        segments = ("search", query, str(page), str(order), str(category))
+        return cls(base_url, segments)
+
+    @classmethod
+    def from_browse(cls, base_url: str, category: int, page: int, order: int):
+        print("browsing")
+        segments = ("browse", str(category), str(page), str(order), "0")
+
+        return cls(base_url, segments)
+
     def _sendRequest(self):
         req = Request(self.url, headers=headers())
         return urlopen(req).read()
 
-def URL(base, segments):
+
+def URL(base: str, segments: Tuple[str, ...]) -> str:
     u = pURL().from_string(base)
     url = u.path_segments(segments)
     return url.as_string()
 
 
 def headers():
-    '''
+    """
     The Pirate Bay blocks requests (403 Forbidden)
     basing on User-Agent header, so it's probably better to rotate them.
     User-Agents taken from:
     https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
-    '''
+    """
     return {
         "User-Agent": random.choice(USER_AGENTS),
         "origin_req_host": "thepiratebay.se",
@@ -42,16 +65,16 @@ def headers():
 
 
 USER_AGENTS = (
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.101 Safari/537.36',
-    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-)
\ No newline at end of file
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.101 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+)
diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
index a2a927d..9d153ac 100644
--- a/tpblite/tpblite.py
+++ b/tpblite/tpblite.py
@@ -1,34 +1,64 @@
+from typing import Optional
+
 from .models.torrents import Torrents, Torrent
 from .models.utils import QueryParser
 
-class TPB(object):
-    
-    def __init__(self, base_url='https://tpb.party'):
-        '''ThePirateBay Object
+
+class TPB:
+
+    # PirateBay URL to use for queries
+    base_url: str
+
+    # Compiled search string used to query the PirateBay URL
+    search_url: Optional[str]
+
+    def __init__(self, base_url="https://tpb.party"):
+        """ThePirateBay Object
 
         Args:
             base_url (str): PirateBay URL to use for queries
 
-        Attributes:
-            search_url (str): This is the compiled search string used
-                to query the PirateBay URL, modified when calling search
-                method
-        '''
+        """
         self.base_url = base_url
         self.search_url = None
-        
-    def __str__(self):
-        return 'TPB Object, base URL: {}'.format(self.base_url)
-        
-    def search(self, query, page=0, order=99, category=0):
-        '''Search ThePirateBay and retturn list of Torrents
+
+    def __str__(self) -> str:
+        return "TPB Object, base URL: {}".format(self.base_url)
+
+    def search(
+        self, query: str, page: int = 0, order: int = 99, category: int = 0
+    ) -> Torrent:
+        """Search ThePirateBay and return list of Torrents
 
         Args:
-            query (str): Search string to query ThePirateBay
-            page (int): page number to grab results from
+            query: Search string to query ThePirateBay
+            page: page number to grab results from
             order TODO
             category TODO
-        '''
-        q = QueryParser(query, self.base_url, page, order, category)
+
+        Return:
+            Torrent
+
+        """
+        q = QueryParser.from_search(query, self.base_url, page, order, category)
+        self.search_url = q.url
+        return Torrents(q.html_source)
+
+    def browse(
+        self, category: int = 0, page: int = 0, order: int = 99
+    ) -> Torrent:
+        """Browse ThePirateBay and return list of Torrents
+
+        Args:
+            query: Search string to query ThePirateBay
+            page: page number to grab results from
+            order TODO
+            category TODO
+
+        Return:
+            Torrent
+
+        """
+        q = QueryParser.from_browse(self.base_url, category, page, order)
         self.search_url = q.url
-        return Torrents(query, q.html_source)
\ No newline at end of file
+        return Torrents(q.html_source)
-- 
cgit v1.2.3


From ec4f40de81d75d54764ab16915aefd082585ea4a Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Wed, 25 Dec 2019 10:47:29 +0100
Subject: Addressing some comments in the PR (mainly code cleaning).

---
 tpblite/models/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tpblite')

diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index eb24f8c..1d5bda6 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Type
+from typing import Tuple
 import random
 from urllib.request import Request, urlopen
 import urllib.error
@@ -35,9 +35,7 @@ class QueryParser:
 
     @classmethod
     def from_browse(cls, base_url: str, category: int, page: int, order: int):
-        print("browsing")
         segments = ("browse", str(category), str(page), str(order), "0")
-
         return cls(base_url, segments)
 
     def _sendRequest(self):
-- 
cgit v1.2.3


From 3435c5b357e7bd0bfc1ea1720230f534b14a5c15 Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Wed, 25 Dec 2019 11:06:01 +0100
Subject: Renaming QueryParser from_browse and from_search methods to browse
 and search. Adding type hinting for these two methods.

---
 tpblite/models/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'tpblite')

diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index 1d5bda6..c6b6248 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,9 +1,12 @@
-from typing import Tuple
+from typing import Tuple, TypeVar
 import random
 from urllib.request import Request, urlopen
 import urllib.error
 from purl import URL as pURL
 
+# https://github.com/python/typing/issues/58#issuecomment-326240794
+T = TypeVar("T", bound="QueryParser")
+
 
 class QueryParser:
     """Query object capable of getting html response given a search query and other
@@ -27,14 +30,14 @@ class QueryParser:
             )
 
     @classmethod
-    def from_search(
+    def search(
         cls, query: str, base_url: str, page: int, order: int, category: int
-    ):
+    ) -> T:
         segments = ("search", query, str(page), str(order), str(category))
         return cls(base_url, segments)
 
     @classmethod
-    def from_browse(cls, base_url: str, category: int, page: int, order: int):
+    def browse(cls, base_url: str, category: int, page: int, order: int) -> T:
         segments = ("browse", str(category), str(page), str(order), "0")
         return cls(base_url, segments)
 
-- 
cgit v1.2.3


From 836fa075264a1d1f43a434c2e134d6bf6cb57943 Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Wed, 25 Dec 2019 11:14:21 +0100
Subject: Adding comment explaining the 0 at the end of the browse URL.

---
 tpblite/models/utils.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tpblite')

diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index c6b6248..dd03b55 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -38,6 +38,7 @@ class QueryParser:
 
     @classmethod
     def browse(cls, base_url: str, category: int, page: int, order: int) -> T:
+        # The 0 is added to the URL to stay consistent with the manual web request
         segments = ("browse", str(category), str(page), str(order), "0")
         return cls(base_url, segments)
 
-- 
cgit v1.2.3


From fdb0167687f1772ff4b2363010e417234061af04 Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Wed, 25 Dec 2019 11:54:00 +0100
Subject: search_url is now a private atttribute and was renamed _search_url.

---
 tpblite/tpblite.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'tpblite')

diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
index 9d153ac..e4a0c58 100644
--- a/tpblite/tpblite.py
+++ b/tpblite/tpblite.py
@@ -9,10 +9,7 @@ class TPB:
     # PirateBay URL to use for queries
     base_url: str
 
-    # Compiled search string used to query the PirateBay URL
-    search_url: Optional[str]
-
-    def __init__(self, base_url="https://tpb.party"):
+    def __init__(self, base_url: str = "https://tpb.party"):
         """ThePirateBay Object
 
         Args:
@@ -20,7 +17,9 @@ class TPB:
 
         """
         self.base_url = base_url
-        self.search_url = None
+
+        # Compiled search string used to query the PirateBay URL
+        self._search_url: Optional[str] = None
 
     def __str__(self) -> str:
         return "TPB Object, base URL: {}".format(self.base_url)
@@ -41,12 +40,10 @@ class TPB:
 
         """
         q = QueryParser.from_search(query, self.base_url, page, order, category)
-        self.search_url = q.url
+        self._search_url = q.url
         return Torrents(q.html_source)
 
-    def browse(
-        self, category: int = 0, page: int = 0, order: int = 99
-    ) -> Torrent:
+    def browse(self, category: int = 0, page: int = 0, order: int = 99) -> Torrent:
         """Browse ThePirateBay and return list of Torrents
 
         Args:
@@ -60,5 +57,5 @@ class TPB:
 
         """
         q = QueryParser.from_browse(self.base_url, category, page, order)
-        self.search_url = q.url
+        self._search_url = q.url
         return Torrents(q.html_source)
-- 
cgit v1.2.3


From 4418d8c73b26639016733bc0a3264a96046c6ab1 Mon Sep 17 00:00:00 2001
From: JPFrancoia <jeanpatrick.francoia@gmail.com>
Date: Wed, 25 Dec 2019 17:11:09 +0100
Subject: Fixing mistakes related to type hinting.

---
 tpblite/models/utils.py |  6 +++---
 tpblite/tpblite.py      | 14 ++++++--------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'tpblite')

diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index dd03b55..6c479d4 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,4 +1,4 @@
-from typing import Tuple, TypeVar
+from typing import Tuple, Type, TypeVar
 import random
 from urllib.request import Request, urlopen
 import urllib.error
@@ -31,13 +31,13 @@ class QueryParser:
 
     @classmethod
     def search(
-        cls, query: str, base_url: str, page: int, order: int, category: int
+        cls: Type[T], query: str, base_url: str, page: int, order: int, category: int
     ) -> T:
         segments = ("search", query, str(page), str(order), str(category))
         return cls(base_url, segments)
 
     @classmethod
-    def browse(cls, base_url: str, category: int, page: int, order: int) -> T:
+    def browse(cls: Type[T], base_url: str, category: int, page: int, order: int) -> T:
         # The 0 is added to the URL to stay consistent with the manual web request
         segments = ("browse", str(category), str(page), str(order), "0")
         return cls(base_url, segments)
diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
index e4a0c58..3c68d62 100644
--- a/tpblite/tpblite.py
+++ b/tpblite/tpblite.py
@@ -6,9 +6,6 @@ from .models.utils import QueryParser
 
 class TPB:
 
-    # PirateBay URL to use for queries
-    base_url: str
-
     def __init__(self, base_url: str = "https://tpb.party"):
         """ThePirateBay Object
 
@@ -16,6 +13,7 @@ class TPB:
             base_url (str): PirateBay URL to use for queries
 
         """
+        # PirateBay URL to use for queries
         self.base_url = base_url
 
         # Compiled search string used to query the PirateBay URL
@@ -26,7 +24,7 @@ class TPB:
 
     def search(
         self, query: str, page: int = 0, order: int = 99, category: int = 0
-    ) -> Torrent:
+    ) -> Torrents:
         """Search ThePirateBay and return list of Torrents
 
         Args:
@@ -36,14 +34,14 @@ class TPB:
             category TODO
 
         Return:
-            Torrent
+            Torrents
 
         """
-        q = QueryParser.from_search(query, self.base_url, page, order, category)
+        q = QueryParser.search(query, self.base_url, page, order, category)
         self._search_url = q.url
         return Torrents(q.html_source)
 
-    def browse(self, category: int = 0, page: int = 0, order: int = 99) -> Torrent:
+    def browse(self, category: int = 0, page: int = 0, order: int = 99) -> Torrents:
         """Browse ThePirateBay and return list of Torrents
 
         Args:
@@ -56,6 +54,6 @@ class TPB:
             Torrent
 
         """
-        q = QueryParser.from_browse(self.base_url, category, page, order)
+        q = QueryParser.browse(self.base_url, category, page, order)
         self._search_url = q.url
         return Torrents(q.html_source)
-- 
cgit v1.2.3