Skip to content
Snippets Groups Projects
Commit 60ebc159 authored by Marco Malavolti's avatar Marco Malavolti
Browse files

Refactor

parent 846b364e
Branches
Tags
No related merge requests found
...@@ -9,11 +9,12 @@ import re ...@@ -9,11 +9,12 @@ import re
import requests import requests
from datetime import date from datetime import date
from eccs2properties import ECCS2LOGSPATH, ECCS2RESULTSLOG, ECCS2CHECKSLOG, ECCS2SELENIUMLOG, FEDS_BLACKLIST, IDPS_BLACKLIST from eccs2properties import ECCS2LOGSPATH, ECCS2RESULTSLOG, ECCS2CHECKSLOG, ECCS2SELENIUMLOG, ECCS2SELENIUMLOGLEVEL, FEDS_BLACKLIST, IDPS_BLACKLIST
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.remote.remote_connection import LOGGER
from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import WebDriverException from selenium.common.exceptions import WebDriverException
...@@ -44,70 +45,52 @@ def checkIdP(sp,idp,logger,driver): ...@@ -44,70 +45,52 @@ def checkIdP(sp,idp,logger,driver):
try: try:
driver.get(sp) driver.get(sp)
driver.find_element_by_id("idpSelectInput").send_keys(idp['entityID'] + Keys.ENTER) driver.find_element_by_id("idpSelectInput").send_keys(idp['entityID'] + Keys.ENTER)
page_source = driver.page_source
status_code = requests.get(driver.current_url, verify=False).status_code
except TimeoutException as e: except TimeoutException as e:
logger.info("%s;%s;999;TIMEOUT" % (idp['entityID'],sp)) logger.info("%s;%s;999;TIMEOUT" % (idp['entityID'],sp))
return "TIMEOUT" return "TIMEOUT"
except WebDriverException as e: except NoSuchElementException as e:
print("!!! WEB DRIVER EXCEPTION !!!") logger.info("%s;%s;888;NoSuchElement" % (idp['entityID'],sp))
raise e print("!!! NO SUCH ELEMENT EXCEPTION !!!")
except Exception as e:
print ("!!! EXCEPTION !!!")
raise e raise e
except WebDriverException as e:
logger.info("%s;%s;777;ConnectionError" % (idp['entityID'],sp))
print("!!! WEB DRIVER EXCEPTION !!!")
print(e.__str__())
return "ERROR"
""" except requests.exceptions.ConnectionError as e:
except MaxRetryError as e: logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
logger.info("%s;%s;111;MaxRetryError" % (idp['entityID'],sp)) return "ERROR"
return "MaxRetryError"
except ConnectionRefusedError as e: except requests.exceptions.TooManyRedirects as e:
logger.info("%s;%s;222;ConnectionRefusedError" % (idp['entityID'],sp)) logger.info("%s;%s;111;TooManyRedirects" % (idp['entityID'],sp))
return "ConnectionRefusedError" return "ERROR"
except ConnectionError as e: except requests.exceptions.RequestException as e:
logger.info("%s;%s;333;ConnectionError" % (idp['entityID'],sp)) logger.info("%s;%s;222;ConnectionError" % (idp['entityID'],sp))
return "ConnectionError" print ("!!! REQUESTS EXCEPTION !!!")
print(e.__str__())
return "ERROR"
except NoSuchElementException as e: except Exception as e:
print("!!! NO SUCH ELEMENT EXCEPTION !!!") logger.info("%s;%s;555;ConnectionError" % (idp['entityID'],sp))
print ("!!! EXCEPTION !!!")
print(e.__str__()) print(e.__str__())
pass return "ERROR"
"""
"""
if "ConnectionRefusedError" in e.__str__():
logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
return "ConnectionRefusedError"
elif "Connection Refused" in e.__str__():
logger.info("%s;%s;000;ConnectionRefused" % (idp['entityID'],sp))
return "Connection-Refused"
else:
print("!!! UN-HANDLE WEB DRIVER EXCEPTION !!!")
raise e
"""
pattern_metadata = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements" pattern_metadata = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements"
pattern_username = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>'; pattern_username = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>';
pattern_password = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>'; pattern_password = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>';
metadata_not_found = re.search(pattern_metadata,driver.page_source, re.I) metadata_not_found = re.search(pattern_metadata,page_source, re.I)
username_found = re.search(pattern_username,driver.page_source, re.I) username_found = re.search(pattern_username,page_source, re.I)
password_found = re.search(pattern_password,driver.page_source, re.I) password_found = re.search(pattern_password,page_source, re.I)
try:
r = requests.get(driver.current_url, verify=False)
status_code = r.status_code
except requests.exceptions.ConnectionError as e:
logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
return "Connection-Error"
except requests.exceptions.RequestException as e:
print("!!! UN-HANDLE REQUEST EXCEPTION !!!")
raise SystemExit(e)
if(metadata_not_found): if(metadata_not_found):
#print("MD-NOT-FOUND - driver.current_url: %s" % (driver.current_url)) #print("MD-NOT-FOUND - driver.current_url: %s" % (driver.current_url))
...@@ -159,10 +142,13 @@ def getIdPContacts(idp,contactType): ...@@ -159,10 +142,13 @@ def getIdPContacts(idp,contactType):
for ctcType in idp['contacts']: for ctcType in idp['contacts']:
if (ctcType == contactType): if (ctcType == contactType):
for ctc in idp['contacts'][contactType]: for ctc in idp['contacts'][contactType]:
if (ctc['emailOrPhone'].get('EmailAddress')): if (ctc.get('emailOrPhone')):
ctcList.append(ctc['emailOrPhone']['EmailAddress'][0]) if (ctc['emailOrPhone'].get('EmailAddress')):
ctcList.append(ctc['emailOrPhone']['EmailAddress'][0])
else:
ctcList.append('missing email')
else: else:
ctcList.append('missing') ctcList.append('missing email')
return ctcList return ctcList
...@@ -244,9 +230,11 @@ if __name__=="__main__": ...@@ -244,9 +230,11 @@ if __name__=="__main__":
chrome_options.add_argument('--start-maximized') chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('--disable-extensions') chrome_options.add_argument('--disable-extensions')
driver = webdriver.Chrome('chromedriver', options=chrome_options, service_args=['--log-path=%s' % ECCS2SELENIUMLOG]) LOGGER.setLevel(ECCS2SELENIUMLOGLEVEL)
#driver = webdriver.Chrome('chromedriver', options=chrome_options, service_args=['--verbose', '--log-path=./selenium_chromedriver.log'])
#driver = webdriver.Chrome('chromedriver', options=chrome_options) driver = webdriver.Chrome('chromedriver', options=chrome_options, service_args=['--log-level=%d' % ECCS2SELENIUMLOGLEVEL, '--log-path=%s' % ECCS2SELENIUMLOG])
# Utility for DEBUG
#driver = webdriver.Chrome('chromedriver', options=chrome_options, service_args=['--verbose', '--log-path=%s' % ECCS2SELENIUMLOG])
# Configure timeouts: 30 sec # Configure timeouts: 30 sec
driver.set_page_load_timeout(30) driver.set_page_load_timeout(30)
...@@ -254,10 +242,10 @@ if __name__=="__main__": ...@@ -254,10 +242,10 @@ if __name__=="__main__":
checkIdp(idp,sps,eccs2log,eccs2checksLog,driver) checkIdp(idp,sps,eccs2log,eccs2checksLog,driver)
driver.delete_all_cookies() #driver.delete_all_cookies()
driver.close() driver.close()
driver.quit() driver.quit()
# Kill process to release resources and to avoid zombies # Kill process to release resources and to avoid zombies - this reaise an issue
# pid = os.getpid() #pid = os.getpid()
# os.kill(pid, signal.SIGTERM) #os.kill(pid, signal.SIGTERM)
#!/usr/bin/env python3.8 #!/usr/bin/env python3.8
import asyncio import asyncio
import datetime
import eccs2properties import eccs2properties
import json import json
import pathlib import pathlib
...@@ -8,7 +9,7 @@ import requests ...@@ -8,7 +9,7 @@ import requests
import sys import sys
import time import time
from eccs2properties import ECCS2STDOUT, ECCS2STDERR, ECCS2PATH, ECCS2NUMPROCESSES from eccs2properties import ECCS2STDOUT, ECCS2STDERR, ECCS2PATH, ECCS2NUMPROCESSES, ECCS2LISTIDPSURL, ECCS2LISTIDPSFILE, ECCS2LISTFEDSURL, ECCS2LISTFEDSFILE
from subprocess import Popen,PIPE from subprocess import Popen,PIPE
# Returns a Dict on "{ nameFed:reg_auth }" # Returns a Dict on "{ nameFed:reg_auth }"
...@@ -36,28 +37,28 @@ def getIdpList(list_eccs_idps,reg_auth): ...@@ -36,28 +37,28 @@ def getIdpList(list_eccs_idps,reg_auth):
# Returns a Python Dictionary # Returns a Python Dictionary
def getListFeds(url, filename): def getListFeds(url, dest_file):
# If file does not exists... download it into the filename # If file does not exists... download it into the dest_file
path = pathlib.Path(filename) path = pathlib.Path(dest_file)
if(path.exists() == False): if(path.exists() == False):
with open("%s" % (filename), mode="w+", encoding='utf-8') as f: with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
f.write(requests.get(url).text) f.write(requests.get(url).text)
# then open it and work with local file # then open it and work with local file
with open("%s" % (filename), mode="r", encoding='utf-8') as f: with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
return json.loads(f.read().replace("'", "&apos;")) return json.loads(f.read().replace("'", "&apos;"))
# Returns a Python List # Returns a Python List
def getListEccsIdps(url, filename): def getListEccsIdps(url, dest_file):
# If file does not exists... download it into the filename # If file does not exists... download it into the dest_file
path = pathlib.Path(filename) path = pathlib.Path(dest_file)
if(path.exists() == False): if(path.exists() == False):
with open("%s" % (filename), mode="w+", encoding='utf-8') as f: with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
f.write(requests.get(url).text) f.write(requests.get(url).text)
# then open it and work with local file # then open it and work with local file
with open("%s" % (filename), mode="r", encoding='utf-8') as f: with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
return json.loads(f.read().replace("'", "&apos;")) return json.loads(f.read().replace("'", "&apos;"))
# Run Command # Run Command
...@@ -94,8 +95,7 @@ async def main(cmd_list,stdout_file,stderr_file): ...@@ -94,8 +95,7 @@ async def main(cmd_list,stdout_file,stderr_file):
# Create worker tasks to process the queue concurrently. # Create worker tasks to process the queue concurrently.
tasks = [] tasks = []
#for i in range(15): # !!!-WORKING-!!!
#for i in range(30): # !!!-WORSTE-!!!
for i in range(ECCS2NUMPROCESSES): for i in range(ECCS2NUMPROCESSES):
task = asyncio.create_task(run("cmd-{%d}" % i, queue, stdout_file, stderr_file)) task = asyncio.create_task(run("cmd-{%d}" % i, queue, stdout_file, stderr_file))
tasks.append(task) tasks.append(task)
...@@ -119,14 +119,14 @@ if __name__=="__main__": ...@@ -119,14 +119,14 @@ if __name__=="__main__":
start = time.time() start = time.time()
# Setup list_feds # Setup list_feds
url = 'https://technical.edugain.org/api.php?action=list_feds&opt=1' url = ECCSLISTFEDSURL
filename = "/tmp/data/list_feds.txt" dest_file = ECCS2LISTFEDSFILE
list_feds = getListFeds(url, filename) list_feds = getListFeds(url, dest_file)
# Setup list_eccs_idps # Setup list_eccs_idps
url = 'https://technical.edugain.org/api.php?action=list_eccs_idps' url = ECCS2LISTIDPSURL
filename = "/tmp/data/list_eccs_idps.txt" dest_file = ECCS2LISTIDPSFILE
list_eccs_idps = getListEccsIdps(url, filename) list_eccs_idps = getListEccsIdps(url, dest_file)
stdout_file = open(ECCS2STDOUT,"w+") stdout_file = open(ECCS2STDOUT,"w+")
stderr_file = open(ECCS2STDERR,"w+") stderr_file = open(ECCS2STDERR,"w+")
...@@ -148,7 +148,6 @@ if __name__=="__main__": ...@@ -148,7 +148,6 @@ if __name__=="__main__":
count = count + 1 count = count + 1
asyncio.run(main(proc_list,stdout_file,stderr_file)) asyncio.run(main(proc_list,stdout_file,stderr_file))
# asyncio.run(main(cmd_list,stdout_file,stderr_file))
end = time.time() end = time.time()
print("Time taken in seconds - ", end - start) print("Time taken in hh:mm:ss - %s", str(datetime.timedelta(seconds=end - start)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment