Skip to content
Snippets Groups Projects
Commit 11d4e71c authored by Marco Malavolti's avatar Marco Malavolti
Browse files

Added IdP exclusion with robots.txt

parent 226eb373
No related branches found
No related tags found
No related merge requests found
......@@ -52,7 +52,12 @@ There are some situations where the check cannot work reliably. In those cases i
# Disable Checks
In cases where an IdP cannot be reliably checked, it is necessary to create, also empty, `eccs-disabled.txt` file on IdP's web root.
In cases where an IdP cannot be reliably checked, it is necessary to create or enrich the `robots.txt` file on the IdP's web root with:
```bash
User-agent: ECCS
Disallow: /
```
# On-line interface
......
......@@ -6,7 +6,7 @@ import json
import re
import requests
from eccs2properties import DAY, ECCS2HTMLDIR, ECCS2OUTPUTDIR, ECCS2RESULTSLOG, FEDS_BLACKLIST, IDPS_BLACKLIST, ECCS2SPS, ECCS2SELENIUMDEBUG
from eccs2properties import DAY, ECCS2HTMLDIR, ECCS2OUTPUTDIR, ECCS2RESULTSLOG, FEDS_BLACKLIST, IDPS_BLACKLIST, ECCS2SPS, ECCS2SELENIUMDEBUG,ROBOTS_USER_AGENT
from pathlib import Path
from selenium.common.exceptions import TimeoutException
from urllib3.util import parse_url
......@@ -26,6 +26,15 @@ def getIDPfqdn(entityIDidp):
else:
return entityIDidp.split(":")[-1]
# Return True if the ECCS check MUST not be run
def checkRobots(url_robots_txt):
robots_txt = requests.get(url_robots_txt)
p = re.compile('^User-agent:\sECCS\sDisallow:\s\/\s*$', re.MULTILINE)
m = p.search(robots_txt.text)
if (m):
return True
else:
return False
# The function check that the IdP recognized the SP by presenting its Login page.
# If the IdP Login page contains "username" and "password" fields, than the test is passed.
......@@ -51,36 +60,44 @@ def checkIdP(sp,idp,test):
fqdn_sp = parse_url(sp)[2]
wayfless_url = sp + idp['entityID']
exclude_idp = ""
robots = ""
try:
headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
exclude_idp = requests.get("https://%s/eccs-disabled.txt" % fqdn_idp, headers=headers, verify=False, timeout=30)
if (exclude_idp == ""):
exclude_idp = requests.get("http://%s/eccs-disabled.txt" % fqdn_idp, headers=headers, verify=False, timeout=30)
headers = {
'User-Agent': '%s' % ROBOTS_USER_AGENT
}
except requests.exceptions.ConnectionError as e:
print("!!! ECCS-DISABLED REQUESTS CONNECTION ERROR EXCEPTION !!!")
#print (e.__str__())
exclude_idp = ""
robots = requests.get("https://%s/robots.txt" % fqdn_idp, headers=headers, verify=True, timeout=30)
except requests.exceptions.Timeout as e:
print("!!! ECCS-DISABLED REQUESTS TIMEOUT EXCEPTION !!!")
#print (e.__str__())
exclude_idp = ""
if (robots == ""):
robots = requests.get("http://%s/robots.txt" % fqdn_idp, headers=headers, verify=True, timeout=30)
if (exclude_idp):
except (requests.exceptions.ConnectionError,requests.exceptions.Timeout,requests.exceptions.SSLError) as e:
check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
if (test is not True):
with open("%s/%s/%s---%s.html" % (ECCS2HTMLDIR,DAY,fqdn_idp,fqdn_sp),"w") as html:
html.write("IdP excluded from check by eccs-disabled.txt")
html.write("IdP excluded from check because the download of 'robots.txt' failed: %s" % e.__str__())
else:
print("IdP excluded from check by eccs-disabled.txt")
print("IdP excluded from check because the download of 'robots.txt' failed: %s" % e.__str__())
return (idp['entityID'],wayfless_url,check_time,"NULL","DISABLED")
if (robots):
check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
p = re.compile('^User-agent:\sECCS\sDisallow:\s\/\s*$', re.MULTILINE)
m = p.search(robots.text)
if (m):
if (test is not True):
with open("%s/%s/%s---%s.html" % (ECCS2HTMLDIR,DAY,fqdn_idp,fqdn_sp),"w") as html:
html.write("IdP excluded from check by robots.txt")
else:
print("IdP excluded from check by robots.txt")
return (idp['entityID'],wayfless_url,check_time,"NULL","DISABLED")
if (idp['registrationAuthority'] in federation_blacklist):
check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
......
......@@ -39,6 +39,9 @@ ECCS2NUMPROCESSES = 25
# The 2 SPs that will be used to test each IdP
ECCS2SPS = ["https://sp24-test.garr.it/Shibboleth.sso/Login?entityID=", "https://attribute-viewer.aai.switch.ch/Shibboleth.sso/Login?entityID="]
# ROBOTS.TXT
ROBOTS_USER_AGENT = "ECCS/2.0 (+https://dev-mm.aai-test.garr.it/eccs2)"
# Registration Authority of Federations to exclude from the check
FEDS_BLACKLIST = [
'http://www.surfconext.nl/',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment