From 51483a1b15deb7d960bccdbd4bba76e95ad24e51 Mon Sep 17 00:00:00 2001
From: Marco Malavolti <marco.malavolti@gmail.com>
Date: Fri, 24 Jul 2020 17:12:00 +0200
Subject: [PATCH] Added comments on ECCS2 disabled-list entities

---
 eccs2.py           | 45 +++++++++++++++++------------------
 eccs2properties.py | 58 +++++++++++++++++++++++-----------------------
 2 files changed, 52 insertions(+), 51 deletions(-)

diff --git a/eccs2.py b/eccs2.py
index 8b68e69..6be73b4 100755
--- a/eccs2.py
+++ b/eccs2.py
@@ -7,7 +7,7 @@ import re
 import requests
 import sys
 
-from eccs2properties import DAY, ECCS2HTMLDIR, ECCS2OUTPUTDIR, ECCS2RESULTSLOG, FEDS_BLACKLIST, IDPS_BLACKLIST, ECCS2SPS, ECCS2SELENIUMDEBUG,ROBOTS_USER_AGENT,ECCS2REQUESTSTIMEOUT
+from eccs2properties import DAY, ECCS2HTMLDIR, ECCS2OUTPUTDIR, ECCS2RESULTSLOG, ECCS2SPS, ECCS2SELENIUMDEBUG,ROBOTS_USER_AGENT,ECCS2REQUESTSTIMEOUT, FEDS_DISABLED_DICT, IDPS_DISABLED_DICT
 from pathlib import Path
 from selenium.common.exceptions import TimeoutException
 from urllib3.util import parse_url
@@ -15,9 +15,8 @@ from utils import getLogger, getIdPContacts, getDriver
 
 
 """
-The script works with 2 SPs that using Shibboleth Embedded Discovery Service to allow IdP selection on their login page.
-The script has been written to simulate an user that inserts the IdP's entityID into the EDS search box and press "Enter" to load its Login Page. The Login Page MUST presents the fields "username" and "password" to pass the check on each SP involved into the test.
-If the IdP Login page presente the fields for both selected SP the test is passed, otherwise it is failed.
+The check works with the wayfless url of two SP and successed if the IdP Login Page appears and contains the fields "username" and "password" for each of them.
+It is possible to disable the check by eccs2properties with *denylist or by "robots.txt" put on the SAMLRequest endpoint root web dir.
 """
 
 # Returns the FQDN to use on the HTML page_source files
@@ -30,7 +29,8 @@ def getIDPlabel(url_or_urn):
 def getIDPfqdn(samlrequest_url):
     return getIDPlabel(samlrequest_url)
 
-# The function check that the IdP recognized the SP by presenting its Login page.
+# This function checks if an IdP recognized the SP by presenting its Login page with "username" and "password" fields.
+# It is possible to disable the check on eccs2properties with the *denylist or by "robots.txt" file into the SAMLRequest endpoint root web dir.
 # If the IdP Login page contains "username" and "password" fields, than the test is passed.
 def checkIdP(sp,idp,test):
 
@@ -47,33 +47,35 @@ def checkIdP(sp,idp,test):
       return None
 
    # Configure Blacklists
-   federation_blacklist = FEDS_BLACKLIST
-   entities_blacklist = IDPS_BLACKLIST 
+   #federations_disabled_list = FEDS_DISABLED_LIST
+   #idps_disabled_list = IDPS_DISABLED_LIST
+   federations_disabled_dict = FEDS_DISABLED_DICT
+   idps_disabled_dict = IDPS_DISABLED_DICT
 
    fqdn_sp = parse_url(sp)[2]
    wayfless_url = sp + idp['entityID']
 
    robots = ""
 
-   if (idp['registrationAuthority'] in federation_blacklist):
+   if (idp['registrationAuthority'] in federations_disabled_dict.keys()):
       check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
 
       if (test is not True):
          with open("%s/%s/%s---%s.html" % (ECCS2HTMLDIR,DAY,label_idp,fqdn_sp),"w") as html:
-              html.write("Federation excluded from check")
+              html.write("%s" % federations_disabled_dict[idp['registrationAuthority']])
       else:
-         print("Federation excluded from check")
+         print("%s" % federations_disabled_dict[idp['registrationAuthority']])
 
       return (idp['entityID'],wayfless_url,check_time,"NULL","DISABLED")
 
-   if (idp['entityID'] in entities_blacklist):
+   if (idp['entityID'] in idps_disabled_dict.keys()):
       check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
 
       if (test is not True):
          with open("%s/%s/%s---%s.html" % (ECCS2HTMLDIR,DAY,label_idp,fqdn_sp),"w") as html:
-              html.write("Identity Provider excluded from check")
+              html.write("%s" % idps_disabled_dict[idp['entityID']])
       else:
-         print("Identity Provider excluded from check")
+         print("%s" % idps_disabled_dict[idp['entityID']])
 
       return (idp['entityID'],wayfless_url,check_time,"NULL","DISABLED")
 
@@ -133,8 +135,7 @@ def checkIdP(sp,idp,test):
 
       return (idp['entityID'],wayfless_url,check_time,"(failed)","SSL-Error")
 
-   # Pass every other exceptions on /robots.txt file. We consider only SSLError.
-   #except (requests.exceptions.ConnectionError,requests.exceptions.TooManyRedirects,requests.exceptions.Timeout,requests.exceptions.RetryError) as e:
+   # Pass every other exceptions on /robots.txt file. I consider only SSLError.
    except Exception as e:
       #print("IdP '%s' HAD HAD A REQUEST ERROR: %s" % (fqdn_idp,e.__str__()))
       robots = ""
@@ -169,33 +170,33 @@ def checkIdP(sp,idp,test):
       status_code = str(requests.get(samlrequest_url, headers=headers, verify=False, timeout=ECCS2REQUESTSTIMEOUT).status_code)
 
    except requests.exceptions.ConnectionError as e:
+     print ("status-code: (failed) - ConnectionError for IdP '%s' with SP '%s'" % (idp['entityID'],sp))
      #print("!!! REQUESTS STATUS CODE CONNECTION ERROR EXCEPTION !!!")
      #print (e.__str__())
-     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      status_code = "(failed)"
 
    except requests.exceptions.Timeout as e:
+     print ("status-code: 111 - TimeoutError for IdP '%s' with SP '%s'" % (idp['entityID'],sp))
      #print("!!! REQUESTS STATUS CODE TIMEOUT EXCEPTION !!!")
      #print (e.__str__())
-     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      status_code = "111"
 
    except requests.exceptions.TooManyRedirects as e:
+     print ("status-code: 222 - TooManyRedirectsError for IdP '%s' with SP '%s'" % (idp['entityID'],sp))
      #print("!!! REQUESTS TOO MANY REDIRECTS EXCEPTION !!!")
      #print (e.__str__())
-     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      status_code = "222"
 
    except requests.exceptions.RequestException as e:
-     print ("!!! REQUESTS EXCEPTION !!!")
+     print ("status-code: 333 - RequestException for IdP '%s' with SP '%s'" % (idp['entityID'],sp))
+     #print ("!!! REQUESTS EXCEPTION !!!")
      print (e.__str__())
-     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      status_code = "333"
 
    except Exception as e:
-     print ("!!! EXCEPTION REQUESTS !!!")
+     print ("status-code: 555 - OtherException for IdP '%s' with SP '%s'" % (idp['entityID'],sp))
+     #print ("!!! EXCEPTION REQUESTS !!!")
      print (e.__str__())
-     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      status_code = "555"
 
    if(metadata_not_found):
diff --git a/eccs2properties.py b/eccs2properties.py
index c8ce2c8..6e72059 100644
--- a/eccs2properties.py
+++ b/eccs2properties.py
@@ -43,33 +43,33 @@ ECCS2SPS = ["https://sp24-test.garr.it/Shibboleth.sso/Login?entityID=", "https:/
 # ROBOTS.TXT
 ROBOTS_USER_AGENT = "ECCS/2.0 (+https://dev-mm.aai-test.garr.it/eccs2)"
 
-# Registration Authority of Federations to exclude from the check
-FEDS_BLACKLIST = [
-   'http://www.surfconext.nl/',
-   'https://www.wayf.dk',
-   'http://feide.no/'
-]
+# { 'reg_auth':'reason' }
+FEDS_DISABLED_DICT = {
+   'http://www.surfconext.nl/':'Federation excluded from check',
+   'https://www.wayf.dk':'Federation excluded from check',
+   'http://feide.no/':'Federation excluded from check'
+}
 
-# EntityID of IDPs to exclude from the check
-IDPS_BLACKLIST = [
-   'https://idp.eie.gr/idp/shibboleth',
-   'https://edugain-proxy.igtf.net/simplesaml/saml2/idp/metadata.php',
-   'https://gn-vho.grnet.gr/idp/shibboleth',
-   'https://wtc.tu-chemnitz.de/shibboleth',
-   'https://idp.utorauth.utoronto.ca/shibboleth',
-   'https://login.lstonline.ac.uk/idp/pingfederate',
-   'https://idp.cambria.ac.uk/openathens',
-   'https://indiid.net/idp/shibboleth',
-   'https://idp.nulc.ac.uk/openathens',
-   'https://lc-idp.lincolncollege.ac.uk/shibboleth',
-   'https://boleth.chi.ac.uk/idp/shibboleth',
-   'https://idp.wnsc.ac.uk/idp/shibboleth',
-   'https://idp.strodes.ac.uk/shibboleth',
-   'https://idp.ucreative.ac.uk/shibboleth',
-   'https://idp.llandrillo.ac.uk/shibboleth',
-   'https://idp.uel.ac.uk/shibboleth',
-   'https://idp-dev.cardiff.ac.uk/idp/shibboleth',
-   'https://sso.vu.lt/SSO/saml2/idp/metadata.php',
-   #'https://ssl.education.lu/saml/saml2/idp/metadata.php',
-   'https://iif.iucc.ac.il/idp/saml2/idp/metadata.php'
-]
+# { 'entityid_idp':'reason' }
+IDPS_DISABLED_DICT = {
+   'https://idp.eie.gr/idp/shibboleth':'Disabled on 2019-04-24 because ECCS cannot check non-standard login page',
+#   'https://edugain-proxy.igtf.net/simplesaml/saml2/idp/metadata.php':'Disabled on 2017-03-17 on request of federation operator',
+   'https://gn-vho.grnet.gr/idp/shibboleth':'Disabled on 2019-04-24 because basic authentication is not supported by ECCS check',
+   'https://wtc.tu-chemnitz.de/shibboleth':'Disabled on 2019-02-26 because ECCS cannot check non-standard login page',
+#   'https://idp.utorauth.utoronto.ca/shibboleth':'Disabled on 2015-08-17 because login on this IdP requires JavaScript, which is not supported by the check',
+   'https://login.lstonline.ac.uk/idp/pingfederate':'Disabled on 2017-02-08 on request of federation operator',
+   'https://idp.cambria.ac.uk/openathens':'Disabled on 2017-10-27 on request of federation operator',
+   'https://indiid.net/idp/shibboleth':'Disabled on 2017-10-27 on request of federation operator',
+   'https://idp.nulc.ac.uk/openathens':'Disabled on 2017-10-27 on request of federation operator',
+#   'https://lc-idp.lincolncollege.ac.uk/shibboleth':'Disabled on 2015-08-17 because uses HTTP Basic authentication, which cannot be checked reliably',
+#   'https://boleth.chi.ac.uk/idp/shibboleth':'Disabled on 2015-08-17 because uses HTTP Basic authentication, which cannot be checked reliably',
+   'https://idp.wnsc.ac.uk/idp/shibboleth':'Disabled on 2017-10-27 on request of federation operator',
+#   'https://idp.strodes.ac.uk/shibboleth':'Disabled on 2015-08-17 because uses HTTP Basic authentication, which cannot be checked reliably',
+   'https://idp.ucreative.ac.uk/shibboleth':'Disabled on 2017-10-27 on request of federation operator',
+   'https://idp.llandrillo.ac.uk/shibboleth':'Disabled on 2017-10-27 on request of federation operator',
+   'https://idp.uel.ac.uk/shibboleth':'Disabled on 2017-10-27 on request of federation operator',
+   'https://idp-dev.cardiff.ac.uk/idp/shibboleth':'Disabled on 2017-02-08 on request of federation operator',
+   'https://sso.vu.lt/SSO/saml2/idp/metadata.php':'Disabled on 2018-11-02 because ECCS cannot check non-standard login page',
+   #'https://ssl.education.lu/saml/saml2/idp/metadata.php':'Disabled on 2018-11-06 ECCS cannot check non-standard login page',
+   'https://iif.iucc.ac.il/idp/saml2/idp/metadata.php':'Disabled on 2018-11-06 ECCS cannot check non-standard login page'
+}
-- 
GitLab