From 3eadc8b9bed23362faef56909ac1dfda7b6c7a5d Mon Sep 17 00:00:00 2001
From: Marco Malavolti <marco.malavolti@gmail.com>
Date: Thu, 11 Jun 2020 12:04:27 +0200
Subject: [PATCH] Refactoring

---
 eccs2.py    | 165 ++++++++++++++++++++++------------------------------
 runEccs2.py | 131 ++++++++++++++++++++++++++---------------
 2 files changed, 152 insertions(+), 144 deletions(-)

diff --git a/eccs2.py b/eccs2.py
index cca7f78..36b7df2 100755
--- a/eccs2.py
+++ b/eccs2.py
@@ -3,10 +3,8 @@
 import argparse
 import json
 import logging
-import time
 import os
 import eccs2properties
-import psutil
 import signal
 import re
 import requests
@@ -21,105 +19,47 @@ from selenium.common.exceptions import TimeoutException
 
 
 """
-  Apre un SP con Discovery Service, seleziona l'IdP di cui fare il test e lo raggiunge iniziando una vera sessione via browser.
-  A noi serve fare un test di accesso e presentazione della pagina di Login su 2 SP dislocati geograficamente in punti diversi.
-  Per questo sono stati scelti SP24(IDEM) e l'Attribute Viewer (SWITCH). Se il test fallisce su entrambi, allora non va bene.
-  Questo script funziona SOLO con SP aventi Embedded Discovery Service come DS.
+  This script use Selenium and Chromium to select the IdP to check from a Shibboleth SP with the Shibboleth Embedded Discovery Service installed and configured to answer to all eduGAIN IdPs.
+  The SPs used to check an IdP will be SP24(IDEM) and Attribute Viewer (SWITCH). 
+  The check will be passed when both SPs will return the authentication page of the IdP checked.
 """
 
-def getIdpListFromUrl():
-   import certifi
-   import urllib3
-   import json
-
-   manager = urllib3.PoolManager(
-               cert_reqs='CERT_REQUIRED',
-               ca_certs=certifi.where()
-             )
-
-   url = "https://technical.edugain.org/api.php?action=list_eccs_idps"
-   json_data = manager.request('GET', url)
-   data = json.loads(json_data.data.decode('utf-8'))
-
-   return data
-
-
-def getIdpListFromFile():
-   import json
-
-   #with open('list_eccs_idps-idem.txt','r',encoding='utf-8') as f:
-   with open('federation_idps.txt','r',encoding='utf-8') as f:
-      json_data = json.loads(f.read())
-      return json_data
-
-
-def checkIdP(sp,idp,logger):
-   # Disable SSL requests warning messages
-   requests.packages.urllib3.disable_warnings()
-
-   # Configure Web-driver
-   chrome_options = webdriver.ChromeOptions()
-   chrome_options.add_argument('--headless')
-   chrome_options.add_argument('--no-sandbox')
-   chrome_options.add_argument('--disable-dev-shm-usage')
-   chrome_options.add_argument('--ignore-certificate-errors')
-
-   driver = webdriver.Chrome('chromedriver', options=chrome_options,  service_args=['--log-path=./selenium_chromedriver.log'])
-   #driver = webdriver.Chrome('chromedriver', chrome_options=chrome_options,  service_args=['--verbose', '--log-path=./selenium_chromedriver.log'])
-   #driver = webdriver.Chrome('chromedriver', chrome_options=chrome_options)
-
-   # Configure timeouts: 30 sec
-   driver.set_page_load_timeout(30)
-   driver.set_script_timeout(30)
+def checkIdP(sp,idp,logger,driver):
 
    # Configure Blacklists
-   federation_blacklist = [
-           'http://www.surfconext.nl/',
-           'https://www.wayf.dk',
-           'http://feide.no/'
-           ]
-
-   entities_blacklist = [ 
-           'https://idp.eie.gr/idp/shibboleth',
-           'https://gn-vho.grnet.gr/idp/shibboleth',
-           'https://wtc.tu-chemnitz.de/shibboleth',
-           'https://wtc.tu-chemnitz.de/shibboleth',
-           'https://idp.fraunhofer.de/idp/shibboleth',
-           'https://login.hs-owl.de/nidp/saml2/metadata',
-           'https://idp.dfn-cert.de/idp/shibboleth'
-           ]
+   federation_blacklist = eccs2properties.FEDS_BLACKLIST
+   entities_blacklist = eccs2properties.IDPS_BLACKLIST 
 
    if (idp['registrationAuthority'] in federation_blacklist):
       logger.info("%s;%s;NULL;Federation excluded from checks" % (idp['entityID'],sp))
-      driver.close()
-      driver.quit()
       return "DISABLED"
 
    if (idp['entityID'] in entities_blacklist):
       logger.info("%s;%s;NULL;IdP excluded from checks" % (idp['entityID'],sp))
-      driver.close()
-      driver.quit()
       return "DISABLED"
 
-
    # Open SP, select the IDP from the EDS and press 'Enter' to reach the IdP login page to check
    try:
       driver.get(sp)
       driver.find_element_by_id("idpSelectInput").send_keys(idp['entityID'] + Keys.ENTER)
 
-      driver.find_element_by_id("username")
-      driver.find_element_by_id("password")
    except TimeoutException as e:
-     driver.delete_all_cookies()
-     print("TIMEOUT - driver.current_url: %s" % (driver.current_url))
-     status_code = requests.get(driver.current_url, verify=False).status_code
-     logger.info("%s;%s;%s;TIMEOUT" % (idp['entityID'],sp,status_code))
-     driver.close()
-     driver.quit()
+     logger.info("%s;%s;999;TIMEOUT" % (idp['entityID'],sp))
      return "TIMEOUT"
    except NoSuchElementException as e:
-     driver.delete_all_cookies()
+     print("!!! NO SUCH ELEMENT EXCEPTION !!!")
+     print(e.__str__())
      pass
+   except WebDriverException as e:
+     if "ConnectionRefusedError" in e.__str__():
+        logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
+        return "Connection-Error"
+     else:
+        print("!!! UN-HANDLE WEB DRIVER EXCEPTION !!!")
+        raise e
+   except:
+     print("!!! UN-HANDLE OTHER EXCEPTION !!!")
+     raise e
 
    pattern_metadata = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements"
 
@@ -130,29 +70,35 @@ def checkIdP(sp,idp,logger):
    username_found = re.search(pattern_username,driver.page_source, re.I)
    password_found = re.search(pattern_password,driver.page_source, re.I)
 
-   if(metadata_not_found):
-      #print("MD-NOT-FOUND - driver.current_url: %s" % (driver.current_url))
-      status_code = requests.get(driver.current_url, verify=False).status_code
-      logger.info("%s;%s;%s;No-eduGAIN-Metadata" % (idp['entityID'],sp,status_code))
+   try:
+      r = requests.get(driver.current_url, verify=False)
+      status_code = r.status_code
+
+   except requests.exceptions.ConnectionError as e:
       driver.delete_all_cookies()
       driver.close()
       driver.quit()
+
+      logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
+      return "Connection-Error"
+   except requests.exceptions.RequestException as e:
+      driver.delete_all_cookies()
+      driver.close()
+      driver.quit()
+      print("!!! UN-HANDLE REQUEST EXCEPTION !!!")
+      raise SystemExit(e)
+
+   if(metadata_not_found):
+      #print("MD-NOT-FOUND - driver.current_url: %s" % (driver.current_url))
+      logger.info("%s;%s;%s;No-eduGAIN-Metadata" % (idp['entityID'],sp,status_code))
       return "No-eduGAIN-Metadata"
    elif not username_found or not password_found:
       #print("INVALID-FORM - entityID: %s, sp: %s, driver.current_url: %s" % (idp['entityID'],sp,driver.current_url))
-      status_code = requests.get(driver.current_url, verify=False).status_code
       logger.info("%s;%s;%s;Invalid-Form" % (idp['entityID'],sp,status_code))
-      driver.delete_all_cookies()
-      driver.close()
-      driver.quit()
-      return "Invalid Form"
+      return "Invalid-Form"
    else:
       #print("MD-FOUND - driver.current_url: %s" % (driver.current_url))
-      status_code = requests.get(driver.current_url, verify=False).status_code
       logger.info("%s;%s;%s;OK" % (idp['entityID'],sp,status_code))
-      driver.delete_all_cookies()
-      driver.close()
-      driver.quit()
       return "OK"
 
 
@@ -196,10 +142,10 @@ def getIdPContacts(idp,contactType):
 
    return ctcList
 
-def checkIdp(idp,sps,eccs2log,eccs2checksLog):
+def checkIdp(idp,sps,eccs2log,eccs2checksLog,driver):
       result = []
       for sp in sps:
-         resultCheck = checkIdP(sp,idp,eccs2checksLog)
+         resultCheck = checkIdP(sp,idp,eccs2checksLog,driver)
          result.append(resultCheck)
 
       listTechContacts = getIdPContacts(idp,'technical')
@@ -254,7 +200,6 @@ if __name__=="__main__":
    eccs2checksLog = getLogger("logs/"+eccs2properties.ECCS2CHECKSLOGPATH,"INFO")
 
    sps = ["https://sp24-test.garr.it/secure", "https://attribute-viewer.aai.switch.ch/eds/"]
-   #sps = ["https://attribute-viewer.aai.switch.ch/eds/", "https://attribute-viewer.aai.switch.ch/eds/"]
 
    parser = argparse.ArgumentParser(description='Checks if the input IdP consumed correctly eduGAIN metadata by accessing two different SPs')
    parser.add_argument("idpJson", metavar="idpJson", nargs=1, help="An IdP in Json format")
@@ -263,4 +208,32 @@ if __name__=="__main__":
 
    idp = json.loads(args.idpJson[0])
 
-   checkIdp(idp,sps,eccs2log,eccs2checksLog)
+   # Disable SSL requests warning messages
+   requests.packages.urllib3.disable_warnings()
+
+   # Configure Web-driver
+   chrome_options = webdriver.ChromeOptions()
+   chrome_options.add_argument('--headless')
+   chrome_options.add_argument('--no-sandbox')
+   chrome_options.add_argument('--disable-dev-shm-usage')
+   chrome_options.add_argument('--ignore-certificate-errors')
+   chrome_options.add_argument('--start-maximized')
+   chrome_options.add_argument('--disable-extensions')
+
+   #driver = webdriver.Chrome('chromedriver', options=chrome_options,  service_args=['--log-path=./selenium_chromedriver.log'])
+   #driver = webdriver.Chrome('chromedriver', options=chrome_options,  service_args=['--verbose', '--log-path=./selenium_chromedriver.log'])
+   driver = webdriver.Chrome('chromedriver', options=chrome_options)
+
+   # Configure timeouts: 30 sec
+   driver.set_page_load_timeout(30)
+   driver.set_script_timeout(30)
+
+   checkIdp(idp,sps,eccs2log,eccs2checksLog,driver)
+
+   driver.delete_all_cookies()
+   driver.close()
+   driver.quit()
+
+   # Kill process to release resources and to avoid zombies
+#   pid = os.getpid()
+#   os.kill(pid, signal.SIGTERM)
diff --git a/runEccs2.py b/runEccs2.py
index 664f7db..21a93eb 100755
--- a/runEccs2.py
+++ b/runEccs2.py
@@ -3,46 +3,69 @@
 import asyncio
 import eccs2properties
 import json
+import pathlib
+import requests
 import sys
 import time
 
 from subprocess import Popen,PIPE
 
-def getIdPs():
-   import certifi
-   import urllib3
-   import json
+# returns a Dict on "{ nameFed:reg_auth }"
+def getRegAuthDict(list_feds):
+   regAuth_dict = {}
+
+   for key,value in list_feds.items():
+      name = value['name']
+      reg_auth = value['reg_auth']
+
+      regAuth_dict[name] = reg_auth
+
+   return regAuth_dict
 
-   manager = urllib3.PoolManager(
-               cert_reqs='CERT_REQUIRED',
-               ca_certs=certifi.where()
-             )
 
-   url = "https://technical.edugain.org/api.php?action=list_eccs_idps"
-   idp_json = manager.request('GET', url)
+# returns a list of IdP for a single federation
+def getIdpList(list_eccs_idps,reg_auth):
 
-   idp_dict = json.loads(idp_json.data.decode('utf-8'))
+   fed_idp_list = []
+   for idp in list_eccs_idps:
+      if (idp['registrationAuthority'] == reg_auth):
+         fed_idp_list.append(idp)
 
-   idp_list = []
+   return fed_idp_list
 
-   #federation = input("Insert the registrationAuthority: ")
-   federation = "http://www.idem.garr.it/"
 
-   for idp in idp_dict:
-      if (idp['registrationAuthority'] == federation):
-         idp_list.append(idp)
+# Returns a Python Dictionary
+def getListFeds(url, filename):
+   # If file does not exists... download it into the filename
+   path = pathlib.Path(filename)
+   if(path.exists() == False):
+      with open("%s" % (filename), mode="w+", encoding='utf-8') as f:
+         f.write(requests.get(url).text)
 
-   return json.dumps(idp_list)
+   # then open it and work with local file
+   with open("%s" % (filename), mode="r", encoding='utf-8') as f:
+      return json.loads(f.read())
 
 
-def getIdpListFromFile():
-   import json
+# Returns a Python List
+def getListEccsIdps(url, filename):
+   # If file does not exists... download it into the filename
+   path = pathlib.Path(filename)
+   if(path.exists() == False):
+      with open("%s" % (filename), mode="w+", encoding='utf-8') as f:
+         f.write(requests.get(url).text)
 
-   #with open('list_eccs_idps-idem.txt','r',encoding='utf-8') as f:
-   with open('federation_idps.txt','r',encoding='utf-8') as f:
-      json_data = json.loads(f.read())
-      return json_data
+   # then open it and work with local file
+   with open("%s" % (filename), mode="r", encoding='utf-8') as f:
+      return json.loads(f.read())
 
+# Prepare input file for ECCS2
+def genEccs2input(reg_auth_dict):
+   for name,regAuth in reg_auth_dict.items():
+      fed_idp_list = getIdpList(list_eccs_idps,regAuth)
+      filename = "/tmp/data/inputEccs2/%s.txt" % name
+      with open("%s" % (filename), mode="w+", encoding='utf-8') as f:
+         f.write(','.join(str(idp) for idp in fed_idp_list))
 
 async def run(name,queue,stdout_file,stderr_file):
    while True:
@@ -59,9 +82,9 @@ async def run(name,queue,stdout_file,stderr_file):
       stdout, stderr = await proc.communicate()
 
       if stdout:
-         stdout_file.write(f'[stdout]\n{stdout.decode()}')
+         stdout_file.write(f'-----\n[cmd-out]\n{cmd}\n[stdout]\n{stdout.decode()}')
       if stderr:
-         stderr_file.write(f'[stderr]\n{stderr.decode()}\n\n[cmd]\n{cmd}')
+         stderr_file.write(f'-----\n[cmd-err]\n{cmd}\n[stderr]\n{stderr.decode()}')
 
       # Notify the queue that the "work cmd" has been processed.
       queue.task_done()
@@ -77,7 +100,9 @@ async def main(cmd_list,stdout_file,stderr_file):
 
     # Create worker tasks to process the queue concurrently.
     tasks = []
-    for i in range(30):
+    #for i in range(15): # !!!-WORKING-!!!
+    #for i in range(30): # !!!-WORSTE-!!!
+    for i in range(10):
         task = asyncio.create_task(run("cmd-{%d}" % i, queue, stdout_file, stderr_file))
         tasks.append(task)
 
@@ -97,30 +122,40 @@ async def main(cmd_list,stdout_file,stderr_file):
 # MAIN
 if __name__=="__main__":
 
- start = time.time()
+   start = time.time()
+
+   # Setup list_feds
+   url = 'https://technical.edugain.org/api.php?action=list_feds&opt=1'
+   filename = "/tmp/data/list_feds.txt"
+   list_feds = getListFeds(url, filename)
+
+   # Setup list_eccs_idps
+   url = 'https://technical.edugain.org/api.php?action=list_eccs_idps'
+   filename = "/tmp/data/list_eccs_idps.txt"
+   list_eccs_idps = getListEccsIdps(url, filename)
+
+   stdout_file = open(eccs2properties.ECCS2STDOUT,"w+")
+   stderr_file = open(eccs2properties.ECCS2STDERR,"w+")
 
- '''
- data = getIdPs()
+   # Prepare input file for ECCS2
+   regAuthDict = getRegAuthDict(list_feds)
+   #genEccs2input(regAuthDict)
 
- f = open('federation_idps.txt', 'w')
- f.write(data)
- f.close()
- '''
- stdout_file = open(eccs2properties.ECCS2STDOUT,"w+")
- stderr_file = open(eccs2properties.ECCS2STDERR,"w+")
+   for name,regAuth in regAuthDict.items():
+      idpJsonList = getIdpList(list_eccs_idps,regAuth)
 
- idpJsonList = getIdpListFromFile()
- num_idps = len(idpJsonList)
- cmd_list = [["%s/eccs2.py \'%s\'" % (eccs2properties.ECCS2PATH, json.dumps(idp))] for idp in idpJsonList]
+      num_idps = len(idpJsonList)
+      cmd_list = [["%s/eccs2.py \'%s\'" % (eccs2properties.ECCS2PATH, json.dumps(idp))] for idp in idpJsonList]
 
- proc_list = []
- count = 0
- while (count < num_idps):
-       cmd = "".join(cmd_list.pop())
-       proc_list.append(cmd)
-       count = count + 1
+      proc_list = []
+      count = 0
+      while (count < num_idps):
+         cmd = "".join(cmd_list.pop())
+         proc_list.append(cmd)
+         count = count + 1
  
- asyncio.run(main(proc_list,stdout_file,stderr_file))
+      asyncio.run(main(proc_list,stdout_file,stderr_file))
+#      asyncio.run(main(cmd_list,stdout_file,stderr_file))
 
- end = time.time()
- print("Time taken in seconds - ", end - start)
+   end = time.time()
+   print("Time taken in seconds - ", end - start)
-- 
GitLab