118 lignes
4.5 KiB

  1. import os
  2. import pickle
  3. import datetime
  4. from urllib.parse import urlparse
  5. import requests
  6. class MyLoginSession:
  7. def __init__(self,
  8. loginUrl,
  9. loginData,
  10. loginTestUrl,
  11. loginTestString,
  12. sessionFileAppendix='_session.dat',
  13. maxSessionTimeSeconds=30 * 60,
  14. proxies=None,
  15. userAgent='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
  16. debug=False,
  17. forceLogin=False,
  18. **kwargs):
  19. """
  20. save some information needed to login the session
  21. you'll have to provide 'loginTestString' which will be looked for in the
  22. responses html to make sure, you've properly been logged in
  23. 'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...
  24. 'loginData' will be sent as post data (dictionary of id : value).
  25. 'maxSessionTimeSeconds' will be used to determine when to re-login.
  26. """
  27. urlData = urlparse(loginUrl)
  28. self.proxies = proxies
  29. self.loginData = loginData
  30. self.loginUrl = loginUrl
  31. self.loginTestUrl = loginTestUrl
  32. self.maxSessionTime = maxSessionTimeSeconds
  33. self.sessionFile = urlData.netloc + sessionFileAppendix
  34. self.userAgent = userAgent
  35. self.loginTestString = loginTestString
  36. self.debug = debug
  37. self.login(forceLogin, **kwargs)
  38. def modification_date(self, filename):
  39. """
  40. return last file modification date as datetime object
  41. """
  42. t = os.path.getmtime(filename)
  43. return datetime.datetime.fromtimestamp(t)
  44. def login(self, forceLogin=False, **kwargs):
  45. """
  46. login to a session. Try to read last saved session from cache file. If this fails
  47. do proper login. If the last cache access was too old, also perform a proper login.
  48. Always updates session cache file.
  49. """
  50. wasReadFromCache = False
  51. if self.debug:
  52. print('loading or generating session...')
  53. if os.path.exists(self.sessionFile) and not forceLogin:
  54. time = self.modification_date(self.sessionFile)
  55. # only load if file less than 30 minutes old
  56. lastModification = (datetime.datetime.now() - time).seconds
  57. if lastModification < self.maxSessionTime:
  58. with open(self.sessionFile, "rb") as f:
  59. self.session = pickle.load(f)
  60. wasReadFromCache = True
  61. if self.debug:
  62. print("loaded session from cache (last access %ds ago) "
  63. % lastModification)
  64. if not wasReadFromCache:
  65. self.session = requests.Session()
  66. self.session.headers.update({'user-agent': self.userAgent})
  67. res = self.session.post(self.loginUrl, data=self.loginData,
  68. proxies=self.proxies, **kwargs)
  69. if self.debug:
  70. print('created new session with login')
  71. self.saveSessionToCache()
  72. # test login
  73. res = self.session.get(self.loginTestUrl)
  74. if res.text.lower().find(self.loginTestString.lower()) < 0:
  75. if self.debug:
  76. print(res.text)
  77. raise Exception("could not log into provided site '%s'"
  78. " (did not find successful login string)"
  79. % self.loginUrl)
  80. def saveSessionToCache(self):
  81. """
  82. save session to a cache file
  83. """
  84. # always save (to update timeout)
  85. with open(self.sessionFile, "wb") as f:
  86. pickle.dump(self.session, f)
  87. if self.debug:
  88. print('updated session cache-file %s' % self.sessionFile)
  89. def retrieveContent(self, url, method="get", postData=None, postDataFiles=None, **kwargs):
  90. """
  91. return the content of the url with respect to the session.
  92. If 'method' is not 'get', the url will be called with 'postData'
  93. as a post request.
  94. """
  95. if method == 'get':
  96. res = self.session.get(url, proxies=self.proxies, **kwargs)
  97. else:
  98. res = self.session.post(url, data=postData, proxies=self.proxies, files=postDataFiles, **kwargs)
  99. # the session has been updated on the server, so also update in cache
  100. self.saveSessionToCache()
  101. return res