diff --git a/jpspy.py b/jpspy.py deleted file mode 100644 index 74ef985..0000000 --- a/jpspy.py +++ /dev/null @@ -1,117 +0,0 @@ -import os -import pickle -import datetime -from urllib.parse import urlparse -import requests - - -class MyLoginSession: - def __init__(self, - loginUrl, - loginData, - loginTestUrl, - loginTestString, - sessionFileAppendix='_session.dat', - maxSessionTimeSeconds=30 * 60, - proxies=None, - userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', - debug=False, - forceLogin=False, - **kwargs): - """ - save some information needed to login the session - - you'll have to provide 'loginTestString' which will be looked for in the - responses html to make sure, you've properly been logged in - - 'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ... - 'loginData' will be sent as post data (dictionary of id : value). - 'maxSessionTimeSeconds' will be used to determine when to re-login. - """ - urlData = urlparse(loginUrl) - - self.proxies = proxies - self.loginData = loginData - self.loginUrl = loginUrl - self.loginTestUrl = loginTestUrl - self.maxSessionTime = maxSessionTimeSeconds - self.sessionFile = urlData.netloc + sessionFileAppendix - self.userAgent = userAgent - self.loginTestString = loginTestString - self.debug = debug - - self.login(forceLogin, **kwargs) - - def modification_date(self, filename): - """ - return last file modification date as datetime object - """ - t = os.path.getmtime(filename) - return datetime.datetime.fromtimestamp(t) - - def login(self, forceLogin=False, **kwargs): - """ - login to a session. Try to read last saved session from cache file. If this fails - do proper login. If the last cache access was too old, also perform a proper login. - Always updates session cache file. - """ - wasReadFromCache = False - if self.debug: - print('loading or generating session...') - if os.path.exists(self.sessionFile) and not forceLogin: - time = self.modification_date(self.sessionFile) - - # only load if file less than 30 minutes old - lastModification = (datetime.datetime.now() - time).seconds - if lastModification < self.maxSessionTime: - with open(self.sessionFile, "rb") as f: - self.session = pickle.load(f) - wasReadFromCache = True - if self.debug: - print("loaded session from cache (last access %ds ago) " - % lastModification) - if not wasReadFromCache: - self.session = requests.Session() - self.session.headers.update({'user-agent': self.userAgent}) - res = self.session.post(self.loginUrl, data=self.loginData, - proxies=self.proxies, **kwargs) - - if self.debug: - print('created new session with login') - self.saveSessionToCache() - - # test login - res = self.session.get(self.loginTestUrl) - if res.text.lower().find(self.loginTestString.lower()) < 0: - if self.debug: - print(res.text) - raise Exception("could not log into provided site '%s'" - " (did not find successful login string)" - % self.loginUrl) - - def saveSessionToCache(self): - """ - save session to a cache file - """ - # always save (to update timeout) - with open(self.sessionFile, "wb") as f: - pickle.dump(self.session, f) - if self.debug: - print('updated session cache-file %s' % self.sessionFile) - - def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs): - """ - return the content of the url with respect to the session. - - If 'method' is not 'get', the url will be called with 'postData' - as a post request. - """ - if method == 'get': - res = self.session.get(url, proxies=self.proxies, **kwargs) - else: - res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs) - - # the session has been updated on the server, so also update in cache - self.saveSessionToCache() - - return res