Przeglądaj źródła

renamed jpspy.py to smpy.py

master
sharky555 4 lat temu
rodzic
commit
45fb9a66d1
1 zmienionych plików z 0 dodań i 117 usunięć
  1. +0
    -117
      jpspy.py

+ 0
- 117
jpspy.py Wyświetl plik

@@ -1,117 +0,0 @@
import os
import pickle
import datetime
from urllib.parse import urlparse
import requests


class MyLoginSession:
def __init__(self,
loginUrl,
loginData,
loginTestUrl,
loginTestString,
sessionFileAppendix='_session.dat',
maxSessionTimeSeconds=30 * 60,
proxies=None,
userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
debug=False,
forceLogin=False,
**kwargs):
"""
save some information needed to login the session

you'll have to provide 'loginTestString' which will be looked for in the
responses html to make sure, you've properly been logged in

'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...
'loginData' will be sent as post data (dictionary of id : value).
'maxSessionTimeSeconds' will be used to determine when to re-login.
"""
urlData = urlparse(loginUrl)

self.proxies = proxies
self.loginData = loginData
self.loginUrl = loginUrl
self.loginTestUrl = loginTestUrl
self.maxSessionTime = maxSessionTimeSeconds
self.sessionFile = urlData.netloc + sessionFileAppendix
self.userAgent = userAgent
self.loginTestString = loginTestString
self.debug = debug

self.login(forceLogin, **kwargs)

def modification_date(self, filename):
"""
return last file modification date as datetime object
"""
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)

def login(self, forceLogin=False, **kwargs):
"""
login to a session. Try to read last saved session from cache file. If this fails
do proper login. If the last cache access was too old, also perform a proper login.
Always updates session cache file.
"""
wasReadFromCache = False
if self.debug:
print('loading or generating session...')
if os.path.exists(self.sessionFile) and not forceLogin:
time = self.modification_date(self.sessionFile)

# only load if file less than 30 minutes old
lastModification = (datetime.datetime.now() - time).seconds
if lastModification < self.maxSessionTime:
with open(self.sessionFile, "rb") as f:
self.session = pickle.load(f)
wasReadFromCache = True
if self.debug:
print("loaded session from cache (last access %ds ago) "
% lastModification)
if not wasReadFromCache:
self.session = requests.Session()
self.session.headers.update({'user-agent': self.userAgent})
res = self.session.post(self.loginUrl, data=self.loginData,
proxies=self.proxies, **kwargs)

if self.debug:
print('created new session with login')
self.saveSessionToCache()

# test login
res = self.session.get(self.loginTestUrl)
if res.text.lower().find(self.loginTestString.lower()) < 0:
if self.debug:
print(res.text)
raise Exception("could not log into provided site '%s'"
" (did not find successful login string)"
% self.loginUrl)

def saveSessionToCache(self):
"""
save session to a cache file
"""
# always save (to update timeout)
with open(self.sessionFile, "wb") as f:
pickle.dump(self.session, f)
if self.debug:
print('updated session cache-file %s' % self.sessionFile)

def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs):
"""
return the content of the url with respect to the session.

If 'method' is not 'get', the url will be called with 'postData'
as a post request.
"""
if method == 'get':
res = self.session.get(url, proxies=self.proxies, **kwargs)
else:
res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs)

# the session has been updated on the server, so also update in cache
self.saveSessionToCache()

return res

||||||
x
 
000:0
Ładowanie…
Anuluj
Zapisz