From 47220e855c2ec81779ed0000f464053a7e58cf4b Mon Sep 17 00:00:00 2001 From: Marco Malavolti <marco.malavolti@gmail.com> Date: Fri, 22 Oct 2021 17:01:41 +0200 Subject: [PATCH] Improved ECCS by fixing <iframe> check --- eccs_properties.py.template | 8 ++++---- utils.py | 17 ++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/eccs_properties.py.template b/eccs_properties.py.template index 28c9b55..0f5ad49 100644 --- a/eccs_properties.py.template +++ b/eccs_properties.py.template @@ -48,11 +48,11 @@ ECCS_SPS = [ ROBOTS_USER_AGENT = "ECCS/2.0 (+https://technical.edugain.org/eccs)" # PATTERNS -JAVASCRIPT = "x-my-okta-version" -IDPERROR = "error.has.occurred|error.occurred|Error.when.processing.authentication.request|The.system.encountered.an.error|Internal.Server.Error|403.Forbidden|Service.Unavailable|InvalidProfileConfiguration|Unexpected.System.Error|404.not.found|404.-.not.found|OpenAthens:.404|On.tapahtunut.virhe|Unhandled.exception|Bad.Gateway|Page.Not.Found|Δεν.επιτρέπεται.η.πρόσβαση|temporary.error|temporarily.unavailable|License.error|n'est.pas.gérée" -METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for|不支持的请求|l'application.n'est.pas.enregistrée|Requisição.não.suportada|トされていないリクエスト|is.not.allowed|Authorization.Failure|Pedido.não.suportado" +JAVASCRIPT = '"x-my-okta-version"' +IDPERROR = "error\s(has\s)?occur(r)?ed|Error\swhen\sprocessing\s(the\s)?authentication\srequest|The.(server|system).encountered.an.error|Internal.Server.Error|403.Forbidden|Service.Unavailable|InvalidProfileConfiguration|Unexpected.System.Error|404\s(.\s)?[Nn]ot.[Ff]ound|OpenAthens:.404|On.tapahtunut.virhe|Unhandled.exception|Bad.Gateway|Page.Not.Found|Δεν.επιτρέπεται.η.πρόσβαση|temporary\s(unavailable|error).?|License.error|n'est.pas.gérée|Invalid.Request|Erreur.!|Please.report.this.error.to|该网站无法访问" +METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for|不支持的请求|l'application.n'est.pas.enregistrée|Requisição.não.suportada|トされていないリクエスト|is.not.allowed|Authorization.Failure|Pedido.não.suportado|Nicht.unterstützte.Anfrage" PASSWORDPATTERN = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>' -REFUSEDPATTERN = '(^http)(.*\.png$)|(.*\.css$)|(.*\.js$)|(.*\.gif$)|(.*\.svg$)|(.*\.jpg$)' +USERNAMEPATTERN = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>' # { 'reg_auth':'reason' } FEDS_DISABLED_DICT = { diff --git a/utils.py b/utils.py index cd3e83e..301bf90 100644 --- a/utils.py +++ b/utils.py @@ -200,9 +200,6 @@ def follow_all_nested_iframes(driver): # ECCS Check made by Selenium def check_idp_response_selenium(sp,idp,test): - # Disable SSL requests warning messages - #requests.packages.urllib3.disable_warnings() - # Common variables fqdn_idp = get_label(idp['Location']) wayfless_url = f"{sp}{idp['entityID']}" @@ -301,14 +298,18 @@ def check_idp_response_selenium(sp,idp,test): # If meet <iframe> follow all iframes if ('<iframe' in driver.page_source): - follow_all_nested_iframes(driver) + pwd_regexp = e_p.PASSWORDPATTERN + pwd_found = re.search(pwd_regexp,driver.page_source, re.I) + if (not pwd_found): + follow_all_nested_iframes(driver) load_js = re.search(e_p.JAVASCRIPT, driver.page_source, re.I) if (load_js): driver.refresh() + input_xpath = '//input[@type="password"]|//input[@type="Password"]|//input[@type="text"]|//input[@type="email"]|//input[@type="user"]|//input[@name="name"]' WebDriverWait(driver, e_p.ECCS_SELENIUMPAGELOADTIMEOUT).until( - EC.presence_of_element_located((By.XPATH,'//input[@type="password"]|//input[@type="Password"]')) + EC.presence_of_element_located((By.XPATH,input_xpath)) ) if (test): pgsrc = f"\n[WAYFLESS_URL]\n{wayfless_url} - OK" @@ -322,7 +323,8 @@ def check_idp_response_selenium(sp,idp,test): metadata_not_found = re.search(e_p.METADATAPATTERN,driver.page_source, re.I) try: - input_password_found = driver.find_element(By.XPATH,'//input[@type="password"]|//input[@type="Password"]') + input_xpath = '//input[@type="password"]|//input[@type="Password"]|//input[@type="text"]|//input[@type="email"]|//input[@type="user"]|//input[@name="name"]' + input_password_found = driver.find_element(By.XPATH, input_xpath) except NoSuchElementException as e: # This IF is for those IdP that doesn't consuming the eduGAIN metadata and reaching Timeout @@ -353,7 +355,7 @@ def check_idp_response_selenium(sp,idp,test): pass # ignore all requests exceptions if (driver.page_source != "<html><head></head><body></body></html>"): - if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nInvalid-Form: No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds" + if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nInvalid-Form: No valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds." else: pgsrc = f"<h1>Invalid Form: no valid login form found in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds</h1><h2>PAGE SOURCE:</h2><br/>{driver.page_source}" stored = store_page_source(pgsrc,idp,sp,test) if (stored): @@ -364,6 +366,7 @@ def check_idp_response_selenium(sp,idp,test): stored = store_page_source(pgsrc,idp,sp,test) if (stored): return (idp['entityID'],wayfless_url,check_time,"Timeout",webdriver_error) + # Exceptions that are not "NoSuchElementExceptions" except e: if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\nTimeout: No valid login form loaded in {e_p.ECCS_SELENIUMPAGELOADTIMEOUT} seconds." else: pgsrc = driver.page_source -- GitLab