aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Lyon <matthewlyon18@gmail.com>2019-12-16 11:47:01 +0000
committerMatt Lyon <matthewlyon18@gmail.com>2019-12-16 11:47:01 +0000
commitaa666a65a3c61fce8b103594cc432aaca2644b8e (patch)
treec513995f9b55829e2aba14ddccb2efcaf2a16224
parent669edd3ea3ded084c03775818710e0eef6e6d92d (diff)
downloadtpb-lite-aa666a65a3c61fce8b103594cc432aaca2644b8e.tar.gz
tpb-lite-aa666a65a3c61fce8b103594cc432aaca2644b8e.tar.bz2
tpb-lite-aa666a65a3c61fce8b103594cc432aaca2644b8e.zip
removed property tags, improved torrent row finding function
-rw-r--r--.gitignore4
-rw-r--r--setup.py2
-rw-r--r--tpblite/models/torrents.py54
-rw-r--r--tpblite/models/utils.py2
-rw-r--r--tpblite/tpblite.py24
5 files changed, 61 insertions, 25 deletions
diff --git a/.gitignore b/.gitignore
index 85a04a7..d1cf5d3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,7 @@ venv.bak/
# Eclipse
*.project
*.pydevproject
+
+# VSCode
+*.vscode
+*.code-workspace
diff --git a/setup.py b/setup.py
index cedcd10..9c98f92 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
from setuptools import setup, find_packages
setup(name='tpblite',
- version='0.1.2',
+ version='0.1.3',
description='The Unofficial Pirate Bay Lightweight Python API',
author='Matt Lyon',
author_email='matthewlyon18@gmail.com',
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
index 677d8ba..87a3b6b 100644
--- a/tpblite/models/torrents.py
+++ b/tpblite/models/torrents.py
@@ -2,9 +2,8 @@ import re
import unicodedata
from bs4 import BeautifulSoup
-from .utils import Query
-
#TODO: implement a pretty print for Torrents object
+#TODO: write better comments
def fileSizeStrToInt(size_str):
'''Converts file size given in *iB format to bytes integer'''
@@ -27,10 +26,10 @@ class Torrent(object):
'''
def __init__(self, html_row):
self.html_row = html_row
- self.__title = None
- self.__magnetlink = None
+ self.title = self._getTitle()
self.seeds, self.leeches = self._getPeers()
- self.uploaded, self.filesize, self.byte_size, self.uploader = self._getFileInfo()
+ self.upload_date, self.filesize, self.byte_size, self.uploader = self._getFileInfo()
+ self.magnetlink = self._getMagnetLink()
def __str__(self):
return '{0}, S: {1}, L: {2}, {3}'.format(self.title,
@@ -40,21 +39,14 @@ class Torrent(object):
def __repr__(self):
return '<Torrent object: {}>'.format(self.title)
-
- #TODO: finish implementing @property tags
- @property
- def title(self):
- if self.__title == None:
- self.__title = self.html_row.find('a', class_='detLink').string
- return self.__title
+ def _getTitle(self):
+ return self.html_row.find('a', class_='detLink').string
- @property
- def magnetlink(self):
- if self.__magnetlink == None:
- tag = self.html_row.find('a', href=(re.compile('magnet')))
- self.__magnetlink = tag.get('href')
- return self.__magnetlink
+ def _getMagnetLink(self):
+ tag = self.html_row.find('a', href=(re.compile('magnet')))
+ link = tag.get('href')
+ return link
def _getPeers(self):
taglist = self.html_row.find_all('td', align='right')
@@ -76,9 +68,12 @@ class Torrents(object):
torrent list or dict. Has methods to select items from
torrent list.
'''
- def __init__(self, html_source):
+ def __init__(self, search_str, html_source):
+ self.search_str = search_str
+ self.__search_set = None
+
self.html_source = html_source
- self.list = self._createTorrentList()
+ self.list = self._createTorrentListMod()
def __str__(self):
return 'Torrents object: {} torrents'.format(len(self.list))
@@ -91,6 +86,12 @@ class Torrents(object):
def __len__(self):
return len(self.list)
+
+ @property
+ def search_set(self):
+ if self.__search_set == None:
+ self.__search_set = set(filter(None, re.split(r'[\s.|\(|\)]',self.search_str.lower())))
+ return self.__search_set
def _createTorrentList(self):
soup = BeautifulSoup(self.html_source, features='html.parser')
@@ -99,6 +100,18 @@ class Torrents(object):
for row in rows:
torrents.append(Torrent(row))
return torrents
+
+ def _createTorrentListMod(self):
+ soup = BeautifulSoup(self.html_source, features='html.parser')
+ rows = soup.body.find_all('tr')
+ torrents = []
+ for row in rows:
+ # Get the lowercase unique set from the row text
+ text_set = set(filter(None, re.split(r'[\s.|\(|\)]',row.text.lower())))
+ # Check if search string is subset
+ if self.search_set.issubset(text_set):
+ torrents.append(Torrent(row))
+ return torrents
def __getRows(self, soup):
rows = soup.body.find_all('tr')
@@ -111,6 +124,7 @@ class Torrents(object):
return rows
else:
return []
+
def getBestTorrent(self, min_seeds=30, min_filesize='1 GiB', max_filesize='4 GiB'):
'''Filters torrent list based on some constraints, then returns highest seeded torrent
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
index 310d9fd..14c002f 100644
--- a/tpblite/models/utils.py
+++ b/tpblite/models/utils.py
@@ -3,7 +3,7 @@ from urllib.request import Request, urlopen
from purl import URL as pURL
-class Query(object):
+class QueryParser(object):
'''
Query object capable of getting html response given
a search query and other parameters.
diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
index 93c812b..a2a927d 100644
--- a/tpblite/tpblite.py
+++ b/tpblite/tpblite.py
@@ -1,9 +1,19 @@
from .models.torrents import Torrents, Torrent
-from .models.utils import Query
+from .models.utils import QueryParser
class TPB(object):
def __init__(self, base_url='https://tpb.party'):
+ '''ThePirateBay Object
+
+ Args:
+ base_url (str): PirateBay URL to use for queries
+
+ Attributes:
+ search_url (str): This is the compiled search string used
+ to query the PirateBay URL, modified when calling search
+ method
+ '''
self.base_url = base_url
self.search_url = None
@@ -11,6 +21,14 @@ class TPB(object):
return 'TPB Object, base URL: {}'.format(self.base_url)
def search(self, query, page=0, order=99, category=0):
- q = Query(query, self.base_url, page, order, category)
+ '''Search ThePirateBay and retturn list of Torrents
+
+ Args:
+ query (str): Search string to query ThePirateBay
+ page (int): page number to grab results from
+ order TODO
+ category TODO
+ '''
+ q = QueryParser(query, self.base_url, page, order, category)
self.search_url = q.url
- return Torrents(q.html_source) \ No newline at end of file
+ return Torrents(query, q.html_source) \ No newline at end of file