aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Lyon <matthewlyon18@gmail.com>2019-10-30 15:27:45 +1100
committerMatt Lyon <matthewlyon18@gmail.com>2019-10-30 15:27:45 +1100
commit1b3b031e4b15f947c539ae76fc892874de03c4be (patch)
treebfe1df7b3919154eb176a92bf48c23c3e21379d8
parent2801af89bf72389ce089aac92dda1bb22797ecc1 (diff)
downloadtpb-lite-1b3b031e4b15f947c539ae76fc892874de03c4be.tar.gz
tpb-lite-1b3b031e4b15f947c539ae76fc892874de03c4be.tar.bz2
tpb-lite-1b3b031e4b15f947c539ae76fc892874de03c4be.zip
initial commit
-rw-r--r--.gitignore7
-rw-r--r--README.md0
-rw-r--r--tpblite/__init__.py0
-rw-r--r--tpblite/models/__init__.py0
-rw-r--r--tpblite/models/torrents.py103
-rw-r--r--tpblite/models/utils.py118
-rw-r--r--tpblite/tpblite.py11
7 files changed, 239 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 894a44c..85a04a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,3 +102,10 @@ venv.bak/
# mypy
.mypy_cache/
+
+# macOS
+*.DS_Store
+
+# Eclipse
+*.project
+*.pydevproject
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/README.md
diff --git a/tpblite/__init__.py b/tpblite/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tpblite/__init__.py
diff --git a/tpblite/models/__init__.py b/tpblite/models/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tpblite/models/__init__.py
diff --git a/tpblite/models/torrents.py b/tpblite/models/torrents.py
new file mode 100644
index 0000000..8567280
--- /dev/null
+++ b/tpblite/models/torrents.py
@@ -0,0 +1,103 @@
+import re
+from bs4 import BeautifulSoup
+
+
+def fileSizeStrToInt(size_str):
+ '''Converts file size given in *iB format to bytes integer'''
+
+ unit_dict = {'KiB':(2**10),
+ 'MiB':(2**20),
+ 'GiB':(2**30),
+ 'TiB':(2**40)}
+ try:
+ num = float(size_str[:-3])
+ unit = size_str[-3:]
+ return int(num * unit_dict[unit])
+ except Exception as e:
+ raise AttributeError('Cannot determine filesize: {0}, error: {1}'.format(size_str,e))
+
+class Torrent(object):
+ '''
+ Abstract class to contain info about torrent
+ magnet link, file size, number of seeds, number of leeches etc.
+ '''
+ def __init__(self, html_row):
+ self.html_row = html_row
+ self.title = self._getTitle()
+ self.seeds, self.leeches = self._getPeers()
+ self.uploaded, self.filesize, self.byte_size, self.uploader = self._getFileInfo()
+ self.filesize_int = fileSizeStrToInt(self.filesize)
+ self.magnetlink = self._getMagnetLink()
+
+ def __str__(self):
+ return '{0}, S: {1}, L: {2}, {3}'.format(self.title,
+ self.seeds,
+ self.leeches,
+ self.filesize)
+
+ def _getTitle(self):
+ return self.html_row.find('a', class_='detLink').string
+
+ def _getPeers(self):
+ taglist = self.html_row.find_all('td', align='right')
+ return int(taglist[0].string), int(taglist[1].string)
+
+ def _getFileInfo(self):
+ text = self.html_row.find('font', class_='detDesc').get_text()
+ t = text.split(',')
+ uptime = t[0].replace('Uploaded ','')
+ size = t[1].replace('Size ', '')
+ byte_size = fileSizeStrToInt(size)
+ uploader = t[2].replace('ULed by ', '').strip()
+ return uptime, size, byte_size, uploader
+
+ def _getMagnetLink(self):
+ tag = self.html_row.find('a', href=(re.compile('magnet')))
+ link = tag.get('href')
+ return link
+
+class Torrents(object):
+ '''
+ Torrent object, takes query response and parses into
+ torrent list or dict. Has methods to select items from
+ torrent list.
+ '''
+ def __init__(self, webpage):
+ self.webpage = webpage
+ self.list = self._createTorrentList()
+
+ def __str__(self):
+ return 'Torrents Object: {0} torrents'.format(len(self.list))
+
+ def __iter__(self):
+ return iter(self.list)
+
+ def _createTorrentList(self):
+ soup = BeautifulSoup(self.webpage, features='html.parser')
+ rows = self.__getRows(soup)
+ torrents = []
+ for row in rows:
+ torrents.append(Torrent(row))
+ return torrents
+
+ def __getRows(self, soup):
+ rows = soup.body.find_all('tr')
+ # remove first and last entries
+ del rows[0]
+ del rows[-1]
+ return rows
+
+ def getBestTorrent(self, min_seeds=30, min_filesize='1 GiB', max_filesize='4 GiB'):
+ if not type(min_filesize) == 'int':
+ min_filesize = fileSizeStrToInt(min_filesize)
+ if not type(max_filesize) == 'int':
+ max_filesize = fileSizeStrToInt(max_filesize)
+ filtered_list = filter(lambda x: self._filterTorrent(x, min_seeds, min_filesize, max_filesize), self.list)
+ sorted_list = sorted(filtered_list, key=lambda x: x.seeds, reverse=True)
+ return sorted_list[0]
+
+ def _filterTorrent(self, torrent, min_seeds, min_filesize, max_filesize):
+ if (torrent.seeds < min_seeds) or (torrent.filesize_int < min_filesize) or (torrent.filesize_int > max_filesize):
+ return False
+ else:
+ return True \ No newline at end of file
diff --git a/tpblite/models/utils.py b/tpblite/models/utils.py
new file mode 100644
index 0000000..4aebc0d
--- /dev/null
+++ b/tpblite/models/utils.py
@@ -0,0 +1,118 @@
+import random
+from urllib.request import Request, urlopen
+
+# Delete these when finished rewriting URL
+from collections import OrderedDict
+from purl import URL as PURL
+# ==============================
+
+class Query(object):
+ '''
+ Query object capable of getting html response given
+ a search query and other parameters.
+ '''
+ def __init__(self, query, base_url='https://tpb.party', page=0, order=99, category=0):
+ self.base_url = base_url
+ self.base_path = '/search'
+ self.url = URL(base_url, self.base_path,
+ segments=['query', 'page', 'order', 'category'],
+ defaults=[query, str(page), str(order), str(category)],
+ )
+ self.webpage = self._sendRequest()
+
+ def _sendRequest(self):
+ req = Request(self.url, headers=headers())
+ return urlopen(req).read()
+
+
+### REWRITE THEN DELETE THESE
+
+def URL(base, path, segments=None, defaults=None):
+ """
+ URL segment handler capable of getting and setting segments by name. The
+ URL is constructed by joining base, path and segments.
+
+ For each segment a property capable of getting and setting that segment is
+ created dynamically.
+ """
+ # Make a copy of the Segments class
+ url_class = type(Segments.__name__, Segments.__bases__,
+ dict(Segments.__dict__))
+ segments = [] if segments is None else segments
+ defaults = [] if defaults is None else defaults
+ # For each segment attach a property capable of getting and setting it
+ for segment in segments:
+ setattr(url_class, segment, url_class._segment(segment))
+ # Instantiate the class with the actual parameters
+ return url_class(base, path, segments, defaults)
+
+
+class Segments(object):
+
+ """
+ URL segment handler, not intended for direct use. The URL is constructed by
+ joining base, path and segments.
+ """
+
+ def __init__(self, base, path, segments, defaults):
+ # Preserve the base URL
+ self.base = PURL(base, path=path)
+ # Map the segments and defaults lists to an ordered dict
+ self.segments = OrderedDict(zip(segments, defaults))
+
+ def build(self):
+ # Join base segments and segments
+ segments = self.base.path_segments() + tuple(self.segments.values())
+ # Create a new URL with the segments replaced
+ url = self.base.path_segments(segments)
+ return url
+
+ def __str__(self):
+ return self.build().as_string()
+
+ def _get_segment(self, segment):
+ return self.segments[segment]
+
+ def _set_segment(self, segment, value):
+ self.segments[segment] = value
+
+ @classmethod
+ def _segment(cls, segment):
+ """
+ Returns a property capable of setting and getting a segment.
+ """
+ return property(
+ fget=lambda x: cls._get_segment(x, segment),
+ fset=lambda x, v: cls._set_segment(x, segment, v),
+ )
+
+
+def headers():
+ """
+ The Pirate Bay blocks requests (403 Forbidden)
+ basing on User-Agent header, so it's probably better to rotate them.
+ User-Agents taken from:
+ https://techblog.willshouse.com/2012/01/03/most-common-user-agents/
+ """
+ return {
+ "User-Agent": random.choice(USER_AGENTS),
+ "origin_req_host": "thepiratebay.se",
+ }
+
+
+USER_AGENTS = (
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.101 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/60.0.3112.113 Safari/537.36',
+)
+
+### ==================== \ No newline at end of file
diff --git a/tpblite/tpblite.py b/tpblite/tpblite.py
new file mode 100644
index 0000000..c987e49
--- /dev/null
+++ b/tpblite/tpblite.py
@@ -0,0 +1,11 @@
+from .models.torrents import Torrents, Torrent
+from .models.utils import Query
+
+
+
+def run():
+ q = Query('avengers endgame 1080p')
+ print(q.url)
+ t = Torrents(q.webpage)
+ torrent = t.getBestTorrent()
+ print(torrent.magnetlink) \ No newline at end of file