|
|
@@ -0,0 +1,117 @@ |
|
|
|
import os |
|
|
|
import pickle |
|
|
|
import datetime |
|
|
|
from urllib.parse import urlparse |
|
|
|
import requests |
|
|
|
|
|
|
|
|
|
|
|
class MyLoginSession: |
|
|
|
def __init__(self, |
|
|
|
loginUrl, |
|
|
|
loginData, |
|
|
|
loginTestUrl, |
|
|
|
loginTestString, |
|
|
|
sessionFileAppendix='_session.dat', |
|
|
|
maxSessionTimeSeconds=30 * 60, |
|
|
|
proxies=None, |
|
|
|
userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', |
|
|
|
debug=False, |
|
|
|
forceLogin=False, |
|
|
|
**kwargs): |
|
|
|
""" |
|
|
|
save some information needed to login the session |
|
|
|
|
|
|
|
you'll have to provide 'loginTestString' which will be looked for in the |
|
|
|
responses html to make sure, you've properly been logged in |
|
|
|
|
|
|
|
'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ... |
|
|
|
'loginData' will be sent as post data (dictionary of id : value). |
|
|
|
'maxSessionTimeSeconds' will be used to determine when to re-login. |
|
|
|
""" |
|
|
|
urlData = urlparse(loginUrl) |
|
|
|
|
|
|
|
self.proxies = proxies |
|
|
|
self.loginData = loginData |
|
|
|
self.loginUrl = loginUrl |
|
|
|
self.loginTestUrl = loginTestUrl |
|
|
|
self.maxSessionTime = maxSessionTimeSeconds |
|
|
|
self.sessionFile = urlData.netloc + sessionFileAppendix |
|
|
|
self.userAgent = userAgent |
|
|
|
self.loginTestString = loginTestString |
|
|
|
self.debug = debug |
|
|
|
|
|
|
|
self.login(forceLogin, **kwargs) |
|
|
|
|
|
|
|
def modification_date(self, filename): |
|
|
|
""" |
|
|
|
return last file modification date as datetime object |
|
|
|
""" |
|
|
|
t = os.path.getmtime(filename) |
|
|
|
return datetime.datetime.fromtimestamp(t) |
|
|
|
|
|
|
|
def login(self, forceLogin=False, **kwargs): |
|
|
|
""" |
|
|
|
login to a session. Try to read last saved session from cache file. If this fails |
|
|
|
do proper login. If the last cache access was too old, also perform a proper login. |
|
|
|
Always updates session cache file. |
|
|
|
""" |
|
|
|
wasReadFromCache = False |
|
|
|
if self.debug: |
|
|
|
print('loading or generating session...') |
|
|
|
if os.path.exists(self.sessionFile) and not forceLogin: |
|
|
|
time = self.modification_date(self.sessionFile) |
|
|
|
|
|
|
|
# only load if file less than 30 minutes old |
|
|
|
lastModification = (datetime.datetime.now() - time).seconds |
|
|
|
if lastModification < self.maxSessionTime: |
|
|
|
with open(self.sessionFile, "rb") as f: |
|
|
|
self.session = pickle.load(f) |
|
|
|
wasReadFromCache = True |
|
|
|
if self.debug: |
|
|
|
print("loaded session from cache (last access %ds ago) " |
|
|
|
% lastModification) |
|
|
|
if not wasReadFromCache: |
|
|
|
self.session = requests.Session() |
|
|
|
self.session.headers.update({'user-agent': self.userAgent}) |
|
|
|
res = self.session.post(self.loginUrl, data=self.loginData, |
|
|
|
proxies=self.proxies, **kwargs) |
|
|
|
|
|
|
|
if self.debug: |
|
|
|
print('created new session with login') |
|
|
|
self.saveSessionToCache() |
|
|
|
|
|
|
|
# test login |
|
|
|
res = self.session.get(self.loginTestUrl) |
|
|
|
if res.text.lower().find(self.loginTestString.lower()) < 0: |
|
|
|
if self.debug: |
|
|
|
print(res.text) |
|
|
|
raise Exception("could not log into provided site '%s'" |
|
|
|
" (did not find successful login string)" |
|
|
|
% self.loginUrl) |
|
|
|
|
|
|
|
def saveSessionToCache(self): |
|
|
|
""" |
|
|
|
save session to a cache file |
|
|
|
""" |
|
|
|
# always save (to update timeout) |
|
|
|
with open(self.sessionFile, "wb") as f: |
|
|
|
pickle.dump(self.session, f) |
|
|
|
if self.debug: |
|
|
|
print('updated session cache-file %s' % self.sessionFile) |
|
|
|
|
|
|
|
def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs): |
|
|
|
""" |
|
|
|
return the content of the url with respect to the session. |
|
|
|
|
|
|
|
If 'method' is not 'get', the url will be called with 'postData' |
|
|
|
as a post request. |
|
|
|
""" |
|
|
|
if method == 'get': |
|
|
|
res = self.session.get(url, proxies=self.proxies, **kwargs) |
|
|
|
else: |
|
|
|
res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs) |
|
|
|
|
|
|
|
# the session has been updated on the server, so also update in cache |
|
|
|
self.saveSessionToCache() |
|
|
|
|
|
|
|
return res |