| @@ -1,117 +0,0 @@ | |||
| import os | |||
| import pickle | |||
| import datetime | |||
| from urllib.parse import urlparse | |||
| import requests | |||
| class MyLoginSession: | |||
| def __init__(self, | |||
| loginUrl, | |||
| loginData, | |||
| loginTestUrl, | |||
| loginTestString, | |||
| sessionFileAppendix='_session.dat', | |||
| maxSessionTimeSeconds=30 * 60, | |||
| proxies=None, | |||
| userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', | |||
| debug=False, | |||
| forceLogin=False, | |||
| **kwargs): | |||
| """ | |||
| save some information needed to login the session | |||
| you'll have to provide 'loginTestString' which will be looked for in the | |||
| responses html to make sure, you've properly been logged in | |||
| 'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ... | |||
| 'loginData' will be sent as post data (dictionary of id : value). | |||
| 'maxSessionTimeSeconds' will be used to determine when to re-login. | |||
| """ | |||
| urlData = urlparse(loginUrl) | |||
| self.proxies = proxies | |||
| self.loginData = loginData | |||
| self.loginUrl = loginUrl | |||
| self.loginTestUrl = loginTestUrl | |||
| self.maxSessionTime = maxSessionTimeSeconds | |||
| self.sessionFile = urlData.netloc + sessionFileAppendix | |||
| self.userAgent = userAgent | |||
| self.loginTestString = loginTestString | |||
| self.debug = debug | |||
| self.login(forceLogin, **kwargs) | |||
| def modification_date(self, filename): | |||
| """ | |||
| return last file modification date as datetime object | |||
| """ | |||
| t = os.path.getmtime(filename) | |||
| return datetime.datetime.fromtimestamp(t) | |||
| def login(self, forceLogin=False, **kwargs): | |||
| """ | |||
| login to a session. Try to read last saved session from cache file. If this fails | |||
| do proper login. If the last cache access was too old, also perform a proper login. | |||
| Always updates session cache file. | |||
| """ | |||
| wasReadFromCache = False | |||
| if self.debug: | |||
| print('loading or generating session...') | |||
| if os.path.exists(self.sessionFile) and not forceLogin: | |||
| time = self.modification_date(self.sessionFile) | |||
| # only load if file less than 30 minutes old | |||
| lastModification = (datetime.datetime.now() - time).seconds | |||
| if lastModification < self.maxSessionTime: | |||
| with open(self.sessionFile, "rb") as f: | |||
| self.session = pickle.load(f) | |||
| wasReadFromCache = True | |||
| if self.debug: | |||
| print("loaded session from cache (last access %ds ago) " | |||
| % lastModification) | |||
| if not wasReadFromCache: | |||
| self.session = requests.Session() | |||
| self.session.headers.update({'user-agent': self.userAgent}) | |||
| res = self.session.post(self.loginUrl, data=self.loginData, | |||
| proxies=self.proxies, **kwargs) | |||
| if self.debug: | |||
| print('created new session with login') | |||
| self.saveSessionToCache() | |||
| # test login | |||
| res = self.session.get(self.loginTestUrl) | |||
| if res.text.lower().find(self.loginTestString.lower()) < 0: | |||
| if self.debug: | |||
| print(res.text) | |||
| raise Exception("could not log into provided site '%s'" | |||
| " (did not find successful login string)" | |||
| % self.loginUrl) | |||
| def saveSessionToCache(self): | |||
| """ | |||
| save session to a cache file | |||
| """ | |||
| # always save (to update timeout) | |||
| with open(self.sessionFile, "wb") as f: | |||
| pickle.dump(self.session, f) | |||
| if self.debug: | |||
| print('updated session cache-file %s' % self.sessionFile) | |||
| def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs): | |||
| """ | |||
| return the content of the url with respect to the session. | |||
| If 'method' is not 'get', the url will be called with 'postData' | |||
| as a post request. | |||
| """ | |||
| if method == 'get': | |||
| res = self.session.get(url, proxies=self.proxies, **kwargs) | |||
| else: | |||
| res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs) | |||
| # the session has been updated on the server, so also update in cache | |||
| self.saveSessionToCache() | |||
| return res | |||