From 47220e855c2ec81779ed0000f464053a7e58cf4b Mon Sep 17 00:00:00 2001
From: Marco Malavolti <marco.malavolti@gmail.com>
Date: Fri, 22 Oct 2021 17:01:41 +0200
Subject: [PATCH] Improved ECCS by fixing <iframe> check

---
 eccs_properties.py.template |  8 ++++----
 utils.py                    | 17 ++++++++++-------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/eccs_properties.py.template b/eccs_properties.py.template
index 28c9b55..0f5ad49 100644
--- a/eccs_properties.py.template
+++ b/eccs_properties.py.template
@@ -48,11 +48,11 @@ ECCS_SPS = [
 ROBOTS_USER_AGENT = "ECCS/2.0 (+https://technical.edugain.org/eccs)"
 
 # PATTERNS
-JAVASCRIPT = "x-my-okta-version"
-IDPERROR = "error.has.occurred|error.occurred|Error.when.processing.authentication.request|The.system.encountered.an.error|Internal.Server.Error|403.Forbidden|Service.Unavailable|InvalidProfileConfiguration|Unexpected.System.Error|404.not.found|404.-.not.found|OpenAthens:.404|On.tapahtunut.virhe|Unhandled.exception|Bad.Gateway|Page.Not.Found|Δεν.επιτρέπεται.η.πρόσβαση|temporary.error|temporarily.unavailable|License.error|n'est.pas.gérée"
-METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for|不支持的请求|l'application.n'est.pas.enregistrée|Requisição.não.suportada|トされていないリクエスト|is.not.allowed|Authorization.Failure|Pedido.não.suportado"
+JAVASCRIPT = '"x-my-okta-version"'
+IDPERROR = "error\s(has\s)?occur(r)?ed|Error\swhen\sprocessing\s(the\s)?authentication\srequest|The.(server|system).encountered.an.error|Internal.Server.Error|403.Forbidden|Service.Unavailable|InvalidProfileConfiguration|Unexpected.System.Error|404\s(.\s)?[Nn]ot.[Ff]ound|OpenAthens:.404|On.tapahtunut.virhe|Unhandled.exception|Bad.Gateway|Page.Not.Found|Δεν.επιτρέπεται.η.πρόσβαση|temporary\s(unavailable|error).?|License.error|n'est.pas.gérée|Invalid.Request|Erreur.!|Please.report.this.error.to|该网站无法访问"
+METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for|不支持的请求|l'application.n'est.pas.enregistrée|Requisição.não.suportada|トされていないリクエスト|is.not.allowed|Authorization.Failure|Pedido.não.suportado|Nicht.unterstützte.Anfrage"
 PASSWORDPATTERN = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>'
-REFUSEDPATTERN = '(^http)(.*\.png$)|(.*\.css$)|(.*\.js$)|(.*\.gif$)|(.*\.svg$)|(.*\.jpg$)'
+USERNAMEPATTERN = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>'
 
 # { 'reg_auth':'reason' }
 FEDS_DISABLED_DICT = {
diff --git a/utils.py b/utils.py
index cd3e83e..301bf90 100644
--- a/utils.py
+++ b/utils.py
@@ -200,9 +200,6 @@ def follow_all_nested_iframes(driver):
 # ECCS Check made by Selenium
 def check_idp_response_selenium(sp,idp,test):
 
-    # Disable SSL requests warning messages
-    #requests.packages.urllib3.disable_warnings()
-
     # Common variables
     fqdn_idp = get_label(idp['Location'])
     wayfless_url = f"{sp}{idp['entityID']}"
@@ -301,14 +298,18 @@ def check_idp_response_selenium(sp,idp,test):
 
        # If meet <iframe> follow all iframes
        if ('<iframe' in driver.page_source):
-          follow_all_nested_iframes(driver)
+          pwd_regexp = e_p.PASSWORDPATTERN
+          pwd_found = re.search(pwd_regexp,driver.page_source, re.I)
+          if (not pwd_found):
+             follow_all_nested_iframes(driver)
 
        load_js = re.search(e_p.JAVASCRIPT, driver.page_source, re.I)
        if (load_js):
           driver.refresh()
 
+       input_xpath = '//input[@type="password"]|//input[@type="Password"]|//input[@type="text"]|//input[@type="email"]|//input[@type="user"]|//input[@name="name"]'
        WebDriverWait(driver, e_p.ECCS_SELENIUMPAGELOADTIMEOUT).until(
-          EC.presence_of_element_located((By.XPATH,'//input[@type="password"]|//input[@type="Password"]'))
+          EC.presence_of_element_located((By.XPATH,input_xpath))
        )
 
        if (test): pgsrc = f"\n[WAYFLESS_URL]\n{wayfless_url} - OK"
@@ -322,7 +323,8 @@ def check_idp_response_selenium(sp,idp,test):
        metadata_not_found = re.search(e_p.METADATAPATTERN,driver.page_source, re.I)
 
        try:
-          input_password_found = driver.find_element(By.XPATH,'//input[@type="password"]|//input[@type="Password"]')
+          input_xpath = '//input[@type="password"]|//input[@type="Password"]|//input[@type="text"]|//input[@type="email"]|//input[@type="user"]|//input[@name="name"]'
+          input_password_found = driver.find_element(By.XPATH, input_xpath)
 
        except NoSuchElementException as e:
           # This IF is for those IdP that doesn't consuming the eduGAIN metadata and reaching Timeout
@@ -353,7 +355,7 @@ def check_idp_response_selenium(sp,idp,test):
                 pass   # ignore all requests exceptions
 
              if (driver.page_source != "<html><head></head><body></body></html>"):
-                if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nInvalid-Form: No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds"
+                if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nInvalid-Form: No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds."
                 else: pgsrc = f"<h1>Invalid Form: no valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds</h1><h2>PAGE SOURCE:</h2><br/>{driver.page_source}"
                 stored = store_page_source(pgsrc,idp,sp,test)
                 if (stored):
@@ -364,6 +366,7 @@ def check_idp_response_selenium(sp,idp,test):
                 stored = store_page_source(pgsrc,idp,sp,test)
                 if (stored):
                    return (idp['entityID'],wayfless_url,check_time,"Timeout",webdriver_error)
+       # Exceptions that are not "NoSuchElementExceptions"
        except e:
           if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds."
           else: pgsrc = driver.page_source
-- 
GitLab