Merge pull request #2 from JPFrancoia/master

Adding a browse() method to the TPB object, to get torrents by category,
author: Matt <32886639+mattlyon93@users.noreply.github.com> 2019-12-25 16:24:25 +0000
committer: GitHub <noreply@github.com> 2019-12-25 16:24:25 +0000
commit: c0b26a45f8bf910de9f594f28003cff7dc9e37a7 (patch)
tree: c2aa2b2636acd81e78223284e8ad616ad250bb3a
parent: c45b6ca3e82a5d10e14c31c4b7d0fdaf66fff933 (diff)
parent: 4418d8c73b26639016733bc0a3264a96046c6ab1 (diff)
download: tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.tar.gz
tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.tar.bz2
tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.zip
3 files changed, 164 insertions, 115 deletions
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
index 2d8bc6a..ada282f 100644
--- a/tpblite/models/torrents.py
+++ b/tpblite/models/torrents.py
@@ -2,133 +2,132 @@ import re
 import unicodedata
 from bs4 import BeautifulSoup
 
-#TODO: write better comments
+# TODO: write better comments
+
 
 def fileSizeStrToInt(size_str):
-    '''Converts file size given in *iB format to bytes integer'''
-    
-    unit_dict = {'KiB':(2**10),
-                 'MiB':(2**20),
-                 'GiB':(2**30),
-                 'TiB':(2**40)}
+    """Converts file size given in *iB format to bytes integer"""
+
+    unit_dict = {"KiB": (2 ** 10), "MiB": (2 ** 20), "GiB": (2 ** 30), "TiB": (2 ** 40)}
     try:
         num = float(size_str[:-3])
         unit = size_str[-3:]
         return int(num * unit_dict[unit])
     except Exception as e:
-        raise AttributeError('Cannot determine filesize: {0}, error: {1}'.format(size_str,e))
-    
-class Torrent(object):
-    '''
+        raise AttributeError(
+            "Cannot determine filesize: {0}, error: {1}".format(size_str, e)
+        )
+
+
+class Torrent:
+    """
     Abstract class to contain info about torrent
     magnet link, file size, number of seeds, number of leeches etc.
-    '''
+    """
+
     def __init__(self, html_row):
         self.html_row = html_row
         self.title = self._getTitle()
         self.seeds, self.leeches = self._getPeers()
-        self.upload_date, self.filesize, self.byte_size, self.uploader = self._getFileInfo()
+        self.upload_date, self.filesize, self.byte_size, self.uploader = (
+            self._getFileInfo()
+        )
         self.magnetlink = self._getMagnetLink()
-        
+
     def __str__(self):
-        return '{0}, S: {1}, L: {2}, {3}'.format(self.title,
-                                                self.seeds,
-                                                self.leeches,
-                                                self.filesize)
-        
+        return "{0}, S: {1}, L: {2}, {3}".format(
+            self.title, self.seeds, self.leeches, self.filesize
+        )
+
     def __repr__(self):
-        return '<Torrent object: {}>'.format(self.title)
+        return "<Torrent object: {}>".format(self.title)
 
     def _getTitle(self):
-        return self.html_row.find('a', class_='detLink').string
+        return self.html_row.find("a", class_="detLink").string
 
     def _getMagnetLink(self):
-        tag = self.html_row.find('a', href=(re.compile('magnet')))
-        link = tag.get('href')
+        tag = self.html_row.find("a", href=(re.compile("magnet")))
+        link = tag.get("href")
         return link
-    
+
     def _getPeers(self):
-        taglist = self.html_row.find_all('td', align='right')
+        taglist = self.html_row.find_all("td", align="right")
         return int(taglist[0].string), int(taglist[1].string)
-    
+
     def _getFileInfo(self):
-        text = self.html_row.find('font', class_='detDesc').get_text()
-        t = text.split(',')
-        uptime = unicodedata.normalize('NFKD', t[0].replace('Uploaded ','').strip())
-        size = unicodedata.normalize('NFKD', t[1].replace('Size ', '').strip())
+        text = self.html_row.find("font", class_="detDesc").get_text()
+        t = text.split(",")
+        uptime = unicodedata.normalize("NFKD", t[0].replace("Uploaded ", "").strip())
+        size = unicodedata.normalize("NFKD", t[1].replace("Size ", "").strip())
         byte_size = fileSizeStrToInt(size)
-        uploader = unicodedata.normalize('NFKD', t[2].replace('ULed by ', '').strip())
+        uploader = unicodedata.normalize("NFKD", t[2].replace("ULed by ", "").strip())
         return uptime, size, byte_size, uploader
-    
-    
-class Torrents(object):
-    '''
+
+
+class Torrents:
+    """
     Torrent object, takes query response and parses into 
     torrent list or dict. Has methods to select items from
     torrent list.
-    '''
-    def __init__(self, search_str, html_source):
-        self.search_str = search_str
-        self.__search_set = None
-        
+    """
+
+    def __init__(self, html_source):
         self.html_source = html_source
         self.list = self._createTorrentList()
-        
+
     def __str__(self):
-        return 'Torrents object: {} torrents'.format(len(self.list))
-    
+        return "Torrents object: {} torrents".format(len(self.list))
+
     def __repr__(self):
-        return '<Torrents object: {} torrents>'.format(len(self.list))
-        
+        return "<Torrents object: {} torrents>".format(len(self.list))
+
     def __iter__(self):
         return iter(self.list)
 
     def __len__(self):
         return len(self.list)
 
-    def __getitem__(self,index):
+    def __getitem__(self, index):
         return self.list[index]
 
-    @property
-    def _search_set(self):
-        if self.__search_set is None:
-            self.__search_set = set(filter(None, re.split(r'[\s.|\(|\)]',self.search_str.lower())))
-        return self.__search_set
-
     def _createTorrentList(self):
-        soup = BeautifulSoup(self.html_source, features='html.parser')
+        soup = BeautifulSoup(self.html_source, features="html.parser")
         if soup.body is None:
-            raise ConnectionError('Could not determine torrents (empty html body)')
-        rows = soup.body.find_all('tr')
+            raise ConnectionError("Could not determine torrents (empty html body)")
+        rows = soup.body.find_all("tr")
         torrents = []
         for row in rows:
-            # Get the lowercase unique set from the row text
-            text_set = set(filter(None, re.split(r'[\s.|\(|\)]',row.text.lower())))
-            # Check if search string is subset
-            if self._search_set.issubset(text_set):
+            if len(row.find_all("td", {"class": "vertTh"})) == 1:
                 torrents.append(Torrent(row))
         return torrents
-    
-    def getBestTorrent(self, min_seeds=30, min_filesize='1 GiB', max_filesize='4 GiB'):
-        '''Filters torrent list based on some constraints, then returns highest seeded torrent
+
+    def getBestTorrent(self, min_seeds=30, min_filesize="1 GiB", max_filesize="4 GiB"):
+        """Filters torrent list based on some constraints, then returns highest seeded torrent
         :param min_seeds (int): minimum seed number filter
         :param min_filesize (str): minimum filesize in XiB form, eg. GiB
         :param max_filesize (str): maximum filesize in XiB form, eg. GiB
-        :return Torrent Object: Torrent with highest seed number, will return None if all are filtered out'''
+        :return Torrent Object: Torrent with highest seed number, will return None if all are filtered out"""
         if not isinstance(min_filesize, int):
             min_filesize = fileSizeStrToInt(min_filesize)
         if not isinstance(max_filesize, int):
             max_filesize = fileSizeStrToInt(max_filesize)
-        filtered_list = filter(lambda x: self._filterTorrent(x, min_seeds, min_filesize, max_filesize), self.list)
+        filtered_list = filter(
+            lambda x: self._filterTorrent(x, min_seeds, min_filesize, max_filesize),
+            self.list,
+        )
         sorted_list = sorted(filtered_list, key=lambda x: x.seeds, reverse=True)
         if len(sorted_list) > 0:
             return sorted_list[0]
         else:
-            print('No torrents found given criteria')
+            print("No torrents found given criteria")
             return None
-        
+
     def _filterTorrent(self, torrent, min_seeds, min_filesize, max_filesize):
-        if (torrent.seeds < min_seeds) or (torrent.byte_size < min_filesize) or (torrent.byte_size > max_filesize):
+        if (
+            (torrent.seeds < min_seeds)
+            or (torrent.byte_size < min_filesize)
+            or (torrent.byte_size > max_filesize)
+        ):
             return False
         else:
-            return True
-\ No newline at end of file
+            return True
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index 1d6b351..6c479d4 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -1,40 +1,65 @@
+from typing import Tuple, Type, TypeVar
 import random
 from urllib.request import Request, urlopen
 import urllib.error
 from purl import URL as pURL
 
+# https://github.com/python/typing/issues/58#issuecomment-326240794
+T = TypeVar("T", bound="QueryParser")
 
-class QueryParser(object):
-    '''
-    Query object capable of getting html response given 
-    a search query and other parameters.
-    '''
-    def __init__(self, query, base_url, page, order, category):
+
+class QueryParser:
+    """Query object capable of getting html response given a search query and other
+    parameters.
+    """
+
+    # PirateBay URL to use for queries
+    base_url: str
+
+    # Compiled search string used to query the PirateBay URL
+    url: str
+
+    def __init__(self, base_url: str, segments: Tuple[str, ...]):
         self.base_url = base_url
-        segments = ('search', query, str(page), str(order), str(category))
         self.url = URL(base_url, segments)
         try:
             self.html_source = self._sendRequest()
         except urllib.error.URLError:
-            raise ConnectionError('Could not establish connection wtih {}'.format(self.base_url))
-     
+            raise ConnectionError(
+                "Could not establish connection wtih {}".format(self.base_url)
+            )
+
+    @classmethod
+    def search(
+        cls: Type[T], query: str, base_url: str, page: int, order: int, category: int
+    ) -> T:
+        segments = ("search", query, str(page), str(order), str(category))
+        return cls(base_url, segments)
+
+    @classmethod
+    def browse(cls: Type[T], base_url: str, category: int, page: int, order: int) -> T:
+        # The 0 is added to the URL to stay consistent with the manual web request
+        segments = ("browse", str(category), str(page), str(order), "0")
+        return cls(base_url, segments)
+
     def _sendRequest(self):
         req = Request(self.url, headers=headers())
         return urlopen(req).read()
 
-def URL(base, segments):
+
+def URL(base: str, segments: Tuple[str, ...]) -> str:
     u = pURL().from_string(base)
     url = u.path_segments(segments)
     return url.as_string()
 
 
 def headers():
-    '''
+    """
     The Pirate Bay blocks requests (403 Forbidden)
     basing on User-Agent header, so it's probably better to rotate them.
     User-Agents taken from:
     https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
-    '''
+    """
     return {
         "User-Agent": random.choice(USER_AGENTS),
         "origin_req_host": "thepiratebay.se",
@@ -42,16 +67,16 @@ def headers():
 
 
 USER_AGENTS = (
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.101 Safari/537.36',
-    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
-    'AppleWebKit/537.36 (KHTML, like Gecko) '
-    'Chrome/60.0.3112.113 Safari/537.36',
-)
-\ No newline at end of file
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.101 Safari/537.36",
+    "Mozilla/5.0 (Windows NT 6.1; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/60.0.3112.113 Safari/537.36",
+)
diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
index a2a927d..3c68d62 100644
--- a/tpblite/tpblite.py
+++ b/tpblite/tpblite.py
@@ -1,34 +1,59 @@
+from typing import Optional
+
 from .models.torrents import Torrents, Torrent
 from .models.utils import QueryParser
 
-class TPB(object):
-    
-    def __init__(self, base_url='https://tpb.party'):
-        '''ThePirateBay Object
+
+class TPB:
+
+    def __init__(self, base_url: str = "https://tpb.party"):
+        """ThePirateBay Object
 
         Args:
             base_url (str): PirateBay URL to use for queries
 
-        Attributes:
-            search_url (str): This is the compiled search string used
-                to query the PirateBay URL, modified when calling search
-                method
-        '''
+        """
+        # PirateBay URL to use for queries
         self.base_url = base_url
-        self.search_url = None
-        
-    def __str__(self):
-        return 'TPB Object, base URL: {}'.format(self.base_url)
-        
-    def search(self, query, page=0, order=99, category=0):
-        '''Search ThePirateBay and retturn list of Torrents
+
+        # Compiled search string used to query the PirateBay URL
+        self._search_url: Optional[str] = None
+
+    def __str__(self) -> str:
+        return "TPB Object, base URL: {}".format(self.base_url)
+
+    def search(
+        self, query: str, page: int = 0, order: int = 99, category: int = 0
+    ) -> Torrents:
+        """Search ThePirateBay and return list of Torrents
 
         Args:
-            query (str): Search string to query ThePirateBay
-            page (int): page number to grab results from
+            query: Search string to query ThePirateBay
+            page: page number to grab results from
             order TODO
             category TODO
-        '''
-        q = QueryParser(query, self.base_url, page, order, category)
-        self.search_url = q.url
-        return Torrents(query, q.html_source)
-\ No newline at end of file
+
+        Return:
+            Torrents
+
+        """
+        q = QueryParser.search(query, self.base_url, page, order, category)
+        self._search_url = q.url
+        return Torrents(q.html_source)
+
+    def browse(self, category: int = 0, page: int = 0, order: int = 99) -> Torrents:
+        """Browse ThePirateBay and return list of Torrents
+
+        Args:
+            query: Search string to query ThePirateBay
+            page: page number to grab results from
+            order TODO
+            category TODO
+
+        Return:
+            Torrent
+
+        """
+        q = QueryParser.browse(self.base_url, category, page, order)
+        self._search_url = q.url
+        return Torrents(q.html_source)
author	Matt <32886639+mattlyon93@users.noreply.github.com>	2019-12-25 16:24:25 +0000
committer	GitHub <noreply@github.com>	2019-12-25 16:24:25 +0000
commit	c0b26a45f8bf910de9f594f28003cff7dc9e37a7 (patch)
tree	c2aa2b2636acd81e78223284e8ad616ad250bb3a
parent	c45b6ca3e82a5d10e14c31c4b7d0fdaf66fff933 (diff)
parent	4418d8c73b26639016733bc0a3264a96046c6ab1 (diff)
download	tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.tar.gz tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.tar.bz2 tpb-lite-c0b26a45f8bf910de9f594f28003cff7dc9e37a7.zip