From 7405de872cd8dd99a94bf9c6e9c20635dfb935c7 Mon Sep 17 00:00:00 2001 From: sharky555 Date: Thu, 24 Dec 2020 18:52:34 -0700 Subject: [PATCH] more initial files --- autoupload.py | 21 +++++++---- smpy.py | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 smpy.py diff --git a/autoupload.py b/autoupload.py index b3a30aa..4eeac94 100644 --- a/autoupload.py +++ b/autoupload.py @@ -46,12 +46,17 @@ def getargs(): # Acquire the authkey used for torrent files from upload.php def getauthkey(): - uploadpage = j.retrieveContent("https://jpopsuki.eu/upload.php") - soup = BeautifulSoup(uploadpage.text, 'html5lib') - rel2 = str(soup.select('#wrapper #content .thin')) - # Regex returns multiple matches, could be optimized. - authkey = re.findall("(?<=value=\")(.*)(?=\")", rel2)[0] - + # uploadpage = j.retrieveContent("https://sugoimusic.me/upload.php") + # soup = BeautifulSoup(uploadpage.text, 'html5lib') + # rel2 = str(soup.select('#wrapper #content .thin')) + # # Regex returns multiple matches, could be optimized. + # authkey = re.findall("(?<=value=\")(.*)(?=\")", rel2)[0] + + # return authkey + smpage = sm.retrieveContent("https://sugoimusic.me/torrents.php?id=118") # Arbitrary page on JPS that has authkey + soup = BeautifulSoup(smpage.text, 'html5lib') + rel2 = str(soup.select('#content .thin .main_column .torrent_table tbody')) + authkey = re.findall('authkey=(.*)&torrent_pass=', rel2) return authkey @@ -70,6 +75,7 @@ def createtorrent(authkey, directory, filename, releasedata): trackers=[authkey]) # Torf requires we store authkeys in a list object. This makes it easier to add multiple announce urls. # Set torrent to private as standard practice for private trackers t.private = True + t.source = "SugoiMusic" t.generate() ## Format releasedata to bring a suitable torrent name. # The reason we don't just use the directory name is because of an error in POSTING. @@ -542,8 +548,9 @@ def detectlanguage(string): def uploadtorrent(torrent, cover, releasedata): + languages = ('Japanese', 'English', 'Korean', 'Chinese', 'Vietnamese') # POST url. - uploadurl = "https://jpopsuki.eu/upload.php" + uploadurl = "https://sugoimusic.me/upload.php" # Dataset containing all of the information obtained from our FLAC files. data = releasedata diff --git a/smpy.py b/smpy.py new file mode 100644 index 0000000..74ef985 --- /dev/null +++ b/smpy.py @@ -0,0 +1,117 @@ +import os +import pickle +import datetime +from urllib.parse import urlparse +import requests + + +class MyLoginSession: + def __init__(self, + loginUrl, + loginData, + loginTestUrl, + loginTestString, + sessionFileAppendix='_session.dat', + maxSessionTimeSeconds=30 * 60, + proxies=None, + userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', + debug=False, + forceLogin=False, + **kwargs): + """ + save some information needed to login the session + + you'll have to provide 'loginTestString' which will be looked for in the + responses html to make sure, you've properly been logged in + + 'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ... + 'loginData' will be sent as post data (dictionary of id : value). + 'maxSessionTimeSeconds' will be used to determine when to re-login. + """ + urlData = urlparse(loginUrl) + + self.proxies = proxies + self.loginData = loginData + self.loginUrl = loginUrl + self.loginTestUrl = loginTestUrl + self.maxSessionTime = maxSessionTimeSeconds + self.sessionFile = urlData.netloc + sessionFileAppendix + self.userAgent = userAgent + self.loginTestString = loginTestString + self.debug = debug + + self.login(forceLogin, **kwargs) + + def modification_date(self, filename): + """ + return last file modification date as datetime object + """ + t = os.path.getmtime(filename) + return datetime.datetime.fromtimestamp(t) + + def login(self, forceLogin=False, **kwargs): + """ + login to a session. Try to read last saved session from cache file. If this fails + do proper login. If the last cache access was too old, also perform a proper login. + Always updates session cache file. + """ + wasReadFromCache = False + if self.debug: + print('loading or generating session...') + if os.path.exists(self.sessionFile) and not forceLogin: + time = self.modification_date(self.sessionFile) + + # only load if file less than 30 minutes old + lastModification = (datetime.datetime.now() - time).seconds + if lastModification < self.maxSessionTime: + with open(self.sessionFile, "rb") as f: + self.session = pickle.load(f) + wasReadFromCache = True + if self.debug: + print("loaded session from cache (last access %ds ago) " + % lastModification) + if not wasReadFromCache: + self.session = requests.Session() + self.session.headers.update({'user-agent': self.userAgent}) + res = self.session.post(self.loginUrl, data=self.loginData, + proxies=self.proxies, **kwargs) + + if self.debug: + print('created new session with login') + self.saveSessionToCache() + + # test login + res = self.session.get(self.loginTestUrl) + if res.text.lower().find(self.loginTestString.lower()) < 0: + if self.debug: + print(res.text) + raise Exception("could not log into provided site '%s'" + " (did not find successful login string)" + % self.loginUrl) + + def saveSessionToCache(self): + """ + save session to a cache file + """ + # always save (to update timeout) + with open(self.sessionFile, "wb") as f: + pickle.dump(self.session, f) + if self.debug: + print('updated session cache-file %s' % self.sessionFile) + + def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs): + """ + return the content of the url with respect to the session. + + If 'method' is not 'get', the url will be called with 'postData' + as a post request. + """ + if method == 'get': + res = self.session.get(url, proxies=self.proxies, **kwargs) + else: + res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs) + + # the session has been updated on the server, so also update in cache + self.saveSessionToCache() + + return res