diff --git a/eccs_properties.py.template b/eccs_properties.py.template index aa9daf2a3a8f409625ca4203245aca200495eed2..574b0c9b286bf74dd03110e519e0f51c98ecbfeb 100644 --- a/eccs_properties.py.template +++ b/eccs_properties.py.template @@ -106,9 +106,7 @@ JAVASCRIPT = '"x-my-okta-version"' IDPERROR = "error\s(has\s)?occur(r)?(ed)$|Error\swhen\sprocessing\s(the\s)?authentication\srequest|The\s(server|system)\sencountered\san\s(internal\s)?error|Internal\sServer\sError|403\sForbidden|Service\sUnavailable|InvalidProfileConfiguration|Unexpected\sSystem\sError|404\s(.\s)?not\sfound|OpenAthens:\s404|On\stapahtunut\svirhe|Unhandled\sexception|Bad\sGateway|Page\sNot\sFound|Δεν\sεπιτρέπεται\sη\sπρόσβαση|tempora(ry|rily)\s(unavailable|error)+|License\serror|n'est\spas\sgérée|Invalid\sRequest|Erreur\s!|Please\sreport\sthis\serror\sto|该网站无法访问|proxy\serror|There\sis\sa\sproblem\swith\syour\saccount" METADATAPATTERN = "Unable\sto\slocate(\sissuer\sin|)\smetadata(\sfor|)|no\smetadata\sfound|profile\sis\snot\sconfigured\sfor\srelying\sparty|Cannot\slocate\sentity|fail\sto\sload\sunknown\sprovider|does\snot\srecognise\sthe\sservice|unable\sto\sload\sprovider|Nous\sn'avons\spas\spu\s(charg|charger)\sle\sfournisseur\sde\sservice|Metadata\snot\sfound|application\s(you\shave\saccessed\s)?is\snot\sregistered\s(for\suse\sthis\sservice)?|Message\sdid\snot\smeet\ssecurity\srequirements|unsupported\s[Rr]equest|METADATANOTFOUND|Unknown\slogin\srequester|is\sunspecified\sor\sunsupported|Unknown\sservice\sprovider|Richiesta\snon\ssupportata|Metadati\snon\strovati|untrusted\sprovider|Unregistered\sService|UNHANDLEDEXCEPTION|Metadata.*.expired|Could\snot\sfind\sany.*.metadata.*.for|不支持的请求|l'application\sn'est\spas\senregistrée|Requisição\snão\ssuportada|トされていないリクエスト|is\snot\sallowed|Authorization\sFailure|Pedido\snão\ssuportado|Nicht\sunterstützte\sAnfrage|Service\sNot\sAuthorized\sfor\sSingle\sSign-On|Your\sbrowser\ssent\sa\srequest\sthat\sthis\sserver\scould\snot\sunderstand|Application\sNot\sAuthorized\sTo\sUse\sCAS" XPATH_CHECK_PATTERN = '//input[@type="password"]|//input[@type="Password"]|//input[@type="email"]|//input[@type="user"]|//input[@name="name"]|//form[@action="/idp/module.php/multiauth/selectsource.php"]|//input[@type="text"]' -PASSWORDPATTERN = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>' -#USERNAMEPATTERN = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>' -#REFUSEDPATTERN = '(^http)(.*\.png$)|(.*\.css$)|(.*\.js$)|(.*\.gif$)|(.*\.svg$)|(.*\.jpg$)' +PASSWORDPATTERN = '<input[\s]+[^>]*((type|name)=\s*"password|email|user|text|name"|password|email|user|text|name)[^>]*>|<form[\s]+[^>]*(action)=\s*"/idp/module.php/multiauth/selectsource.php"[^>]*>' # { 'reg_auth':'reason' } FEDS_DISABLED_DICT = { diff --git a/utils.py b/utils.py index aadb29cc2584055aaaf5ffa3797427792394aded..1d40de9805adc64be84f267dba0d8aba49252f60 100644 --- a/utils.py +++ b/utils.py @@ -2,6 +2,7 @@ import base64 import datetime +import html import json import logging import pathlib @@ -169,7 +170,7 @@ def get_idp_contacts(idp,contactType): ctcList.append('missing email') return ctcList -def store_page_source(page_source,idp,sp,test): +def store_page_source(idp,sp,test,page_source="",header="",footer=""): """Writes the login page source into a specific file :param page_souce: content to write into a file @@ -181,13 +182,13 @@ def store_page_source(page_source,idp,sp,test): """ if (test): - sys.stdout.write(f"{page_source}") + sys.stdout.write(f"{header}\n{html.escape(page_source)}\n{footer}") return True else: # Put the page_source into an appropriate HTML file - with open(f"{e_p.ECCS_HTMLDIR}/{e_p.DAY}/{sha1(idp['entityID'])}---{get_label(sp['entityID'])}.html","w") as html: + with open(f"{e_p.ECCS_HTMLDIR}/{e_p.DAY}/{sha1(idp['entityID'])}---{get_label(sp['entityID'])}.html","w") as h: try: - html.write(page_source) + h.write(f"{header}{html.escape(page_source)}{footer}") return True except IOError: return False @@ -308,14 +309,14 @@ def check_idp_response_selenium(sp,idp,test): # Handle Disabled Idps/Federations if (idp['registrationAuthority'] in federations_disabled_dict.keys()): check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z' - page_source = federations_disabled_dict[idp['registrationAuthority']] - store_page_source(page_source,idp,sp,test) + pgsrc = federations_disabled_dict[idp['registrationAuthority']] + store_page_source(idp,sp,test,pgsrc) return (idp['entityID'],sp['entityID'],check_time,"DISABLED",webdriver_error) if (idp['entityID'] in idps_disabled_dict.keys()): check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z' - page_source = idps_disabled_dict[idp['entityID']] - store_page_source(page_source,idp,sp,test) + pgsrc = idps_disabled_dict[idp['entityID']] + store_page_source(idp,sp,test,pgsrc) return (idp['entityID'],sp['entityID'],check_time,"DISABLED",webdriver_error) # Robots + SSL Check @@ -332,9 +333,12 @@ def check_idp_response_selenium(sp,idp,test): # Catch SSL Exceptions and block the ECCS check except requests.exceptions.SSLError as e: if ('unable to get local issuer certificate' not in str(e)): - if (test): page_source = f"\nAn SSL Error occurred while opening https://{fqdn_idp}/robots.txt:\n\n{e}\n\nCheck it on SSL Labs: https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}" - else: page_source = f"<h1>SSL ERROR</h1><h2>An SSL error occurred for the server {fqdn_idp}:</h2><p>{e}</p><p>Check it on SSL Labs: <a href='https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}'>Click Here</a></p>" - store_page_source(page_source,idp,sp,test) + if (test): + header = f"\nAn SSL Error occurred while opening https://{fqdn_idp}/robots.txt:\n\n{e}\n\nCheck it on SSL Labs: https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}" + else: + header = f"<h1>SSL ERROR</h1><h2>An SSL error occurred for the server {fqdn_idp}:</h2><p>{e}</p><p>Check it on SSL Labs: <a href='https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}'>Click Here</a></p>" + pgsrc = "" + store_page_source(idp,sp,test,pgsrc,header) return (idp['entityID'],sp['entityID'],check_time,"SSL-Error",webdriver_error) else: pass @@ -349,8 +353,9 @@ def check_idp_response_selenium(sp,idp,test): m = p.search(robots.text) if (m): - page_source = "<h1>IdP excluded from check by robots.txt</h1>" - store_page_source(page_source,idp,sp,test) + header = "<h1>IdP excluded from check by robots.txt</h1>" + pgsrc = "" + store_page_source(page_source,idp,sp,test,pgsrc,header) return (idp['entityID'],sp['entityID'],check_time,"DISABLED",webdriver_error) try: @@ -374,22 +379,31 @@ def check_idp_response_selenium(sp,idp,test): # Support HTTP Basic Authentication unauthorized = re.search('401.(\D.|\s.)?Unauthorized', pgsrc, re.IGNORECASE) if (unauthorized): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\n[SP] {sp['entityID']} - 401 UNAUTHORIZED FOUND" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\n[SP] {sp['entityID']} - 401 UNATHORIZED FOUND\n\n[PAGE_SOURCE]\n" + else: + header = f"<h1>SP {sp['entityID']} - 401 UNAUTHORIZED FOUND</h1><h2>[PAGE_SOURCE]</h2>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"OK",webdriver_error) metadata_not_found = re.search(e_p.METADATAPATTERN, pgsrc, re.IGNORECASE) if (metadata_not_found): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\n[SP] {sp['entityID']} - METADATA NOT FOUND" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\n[SP] {sp['entityID']} - METADATA NOT FOUND\n[PAGE_SOURCE]\n" + else: + header = f"<h1>SP {sp['entityID']} - METADATA NOT FOUND</h1><h2>[PAGE_SOURCE]</h2>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"No-eduGAIN-Metadata",webdriver_error) idp_error = re.search(e_p.IDPERROR, pgsrc, re.IGNORECASE) if (idp_error): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\n[SP] {sp['entityID']} - IDP ERROR" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\n[SP] {sp['entityID']} - IDP ERROR\n[PAGE_SOURCE]\n" + else: + header = f"<h1>SP {sp['entityID']} - IDP Error</h1><h2>[PAGE_SOURCE]</h2>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"IdP-Error",webdriver_error) @@ -399,8 +413,7 @@ def check_idp_response_selenium(sp,idp,test): # If meet <iframe> follow all iframes if ('<iframe' in pgsrc): - pwd_regexp = e_p.PASSWORDPATTERN - pwd_found = re.search(pwd_regexp,pgsrc, re.IGNORECASE) + pwd_found = re.search(e_p.PASSWORDPATTERN, pgsrc, re.IGNORECASE) if (not pwd_found): follow_all_nested_iframes(driver) @@ -408,8 +421,11 @@ def check_idp_response_selenium(sp,idp,test): EC.presence_of_element_located((By.XPATH,e_p.XPATH_CHECK_PATTERN)) ) - if (test): pgsrc = f"\n[SP] {sp['entityID']} - [IDP] {idp['entityID']} - OK" - stored = store_page_source(driver.page_source,idp,sp,test) + if (test): + pgsrc = f"\n[SP] {sp['entityID']} - [IDP] {idp['entityID']} - OK" + else: + pgsrc = driver.page_source + stored = store_page_source(idp,sp,test,pgsrc) if (stored): return (idp['entityID'],sp['entityID'],check_time,"OK",webdriver_error) @@ -422,10 +438,15 @@ def check_idp_response_selenium(sp,idp,test): input_xpath_found = driver.find_element(By.XPATH, e_p.XPATH_CHECK_PATTERN) except NoSuchElementException as e: + exception_msg = "" + # This IF is for those IdP that doesn't consuming the eduGAIN metadata and reaching Timeout if (metadata_not_found): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\n[SP] {sp['entityID']} - METADATA NOT FOUND" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\n[SP] {sp['entityID']} - METADATA NOT FOUND\n[PAGE_SOURCE]\n" + else: + header = f"<h1>SP {sp['entityID']} - METADATA NOT FOUND</h1><h2>[PAGE_SOURCE]</h2>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"No-eduGAIN-Metadata",webdriver_error) else: @@ -433,52 +454,89 @@ def check_idp_response_selenium(sp,idp,test): response = requests.get(f"{driver.current_url}", timeout=e_p.ECCS_REQUESTSTIMEOUT) if (response.status_code == 401): - if (test): pgsrc = f"\n[PAGE_SOURCE]\nHTTP Basic Authentication\n[URL]{driver.current_url} - 401 STATUS CODE FOUND" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\nHTTP Basic Authentication found\n[URL]{driver.current_url} - 401 STATUS CODE FOUND" + else: + header = f"<h1>401 HTTP Basic Authentication found</h1><h2>[PAGE_SOURCE]</h2>" + pgsrc = "" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"OK",webdriver_error) if (response.status_code == 403): - if (test): pgsrc = f"\n[PAGE_SOURCE]\nForbidden\n[URL]{driver.current_url} - 403 STATUS CODE FOUND" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\nForbidden found\n[URL]{driver.current_url} - 403 STATUS CODE FOUND" + else: + header = f"<h1>403 Forbidden found</h1><h2>[PAGE_SOURCE]</h2>" + pgsrc = "" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"IdP-Error",webdriver_error) - except: + + except requests.exceptions.SSLError as e: + if ('unable to get local issuer certificate' not in str(e)): + if (test): + header = f"\nAn SSL Error occurred while opening https://{fqdn_idp}/robots.txt:\n\n{e}\n\nCheck it on SSL Labs: https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}\n[PAGE_SOURCE]\n" + else: + header = f"<h1>SSL ERROR</h1><h2>An SSL error occurred for the server {fqdn_idp}:</h2><p>{e}</p><p>Check it on SSL Labs: <a href='https://www.ssllabs.com/ssltest/analyze.html?d={fqdn_idp}'>Click Here</a></p><h3>[PAGE_SOURCE]</h3>" + pgsrc = "" + store_page_source(idp,sp,test,pgsrc,header) + return (idp['entityID'],sp['entityID'],check_time,"SSL-Error",webdriver_error) + else: + pass + + except Exception as e: + exception_msg = e pass # ignore all requests exceptions # IdPs that do not show a Metadata error after reaching the Timeout and that raise an Exception on the "request" if (pgsrc != "<html><head></head><body></body></html>" or pgsrc != ""): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\nUnable-To-Check: ECCS can't check the IdP login." - else: pgsrc = f"<h1>Unable To Check - ECCS can't check the IdP login</h1><h2>IDP LOGIN PAGE SOURCE:</h2><br/>{pgsrc}" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\nUnable-To-Check: ECCS can't check the IdP login for {sp['entityID']}.\nError Message: {exception_msg}\n[PAGE_SOURCE]\n{pgsrc}" + else: + header = f"<h1>Unable To Check<h1><h2>ECCS can't check the IdP login for {sp['entityID']}</h2><h3>Error Message:</h3>{exception_msg}<br/><h3>[PAGE SOURCE]</h3>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"Unable-To-Check",webdriver_error) else: - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds." - else: pgsrc = f"<h1>Timeout - No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.</h1>" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.\n[PAGE_SOURCE]\n{pgsrc}" + else: + header = f"<h1>Timeout</h1><h2>No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.</h2><h3>[PAGE_SOURCE]</h3>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"Timeout",webdriver_error) + # Exceptions that are not "NoSuchElementExceptions" except e: - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc}\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds." - stored = store_page_source(f"<h1>Timeout - No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.</h1><br/><p>{pgsrc}</p>",idp,sp,test) + if (test): + header = f"\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.\n[PAGE_SOURCE]\n{pgsrc}" + else: + header = f"<h1>Timeout - No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds.</h1><h2>[PAGE_SOURCE]</h2>" + pgsrc = "" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"Timeout",webdriver_error) # input_xpath has been found # This IF is for those IdPs that Timeout is caused by an image or other that do not prevent the Login process. - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{pgsrc} - Timeout but OK" - stored = store_page_source(pgsrc,idp,sp,test) + if (test): + header = f"\nTimeout but IdP Login found\n[PAGE_SOURCE]\n" + else: + header = f"<h1>Timeout due to a media - But IdP Login is provided.</h1><h2>[PAGE_SOURCE]</h2>" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"OK",webdriver_error) except WebDriverException as e: error = e.__dict__['msg'].split('(')[0].rstrip() - if (test): pgsrc = f"\nA Connection error occurred while opening {generate_login_url(sp['entityID'], sp['http_post_acs_location'], idp['Location'])}:\n\n{error}" - else: pgsrc = f"<h1>CONNECTION ERROR</h1><h2>A Connection error occurred while opening <a href='{generate_login_url(sp['entityID'], sp['http_post_acs_location'], idp['Location'])}'>SAML Request URL</a>:</h2><p>{error}</p>" + if (test): + header = f"\nA Connection error occurred while opening {generate_login_url(sp['entityID'], sp['http_post_acs_location'], idp['Location'])}:\n\n{error}" + else: + header = f"<h1>CONNECTION ERROR</h1><h2>A Connection error occurred while opening <a href='{generate_login_url(sp['entityID'], sp['http_post_acs_location'], idp['Location'])}'>SAML Request URL</a>:</h2><p>{error}</p>" webdriver_error = 1 - stored = store_page_source(pgsrc,idp,sp,test) + pgsrc = "" + stored = store_page_source(idp,sp,test,pgsrc,header) if (stored): return (idp['entityID'],sp['entityID'],check_time,"Connection-Error",webdriver_error)