From ce7e223cb0f188cb35d67b811385bdadd4f0e99a Mon Sep 17 00:00:00 2001 From: Marco Malavolti <marco.malavolti@gmail.com> Date: Thu, 29 Jul 2021 20:08:30 +0200 Subject: [PATCH] Improved Selenium check --- eccs2properties.py | 2 +- utils.py | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/eccs2properties.py b/eccs2properties.py index cd2f719..c7162e4 100644 --- a/eccs2properties.py +++ b/eccs2properties.py @@ -48,7 +48,7 @@ ECCS2SPS = [ ROBOTS_USER_AGENT = "ECCS/2.0 (+https://technical-test.edugain.org/eccs2)" # PATTERNS -METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service" +METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for" USERNAMEPATTERN = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>' PASSWORDPATTERN = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>' REFUSEDPATTERN = '(^http)(.*\.png$)|(.*\.css$)|(.*\.js$)|(.*\.gif$)|(.*\.svg$)|(.*\.jpg$)' diff --git a/utils.py b/utils.py index 2d9a404..8536933 100644 --- a/utils.py +++ b/utils.py @@ -160,7 +160,7 @@ def get_driver_selenium(idp=None,sp=None,debugSelenium=False): # Configure Web-driver # https://peter.sh/experiments/chromium-command-line-switches/ chrome_options = Options() - chrome_options.page_load_strategy = 'eager' + chrome_options.page_load_strategy = 'normal' chrome_options.add_argument('--start-in-incognito') chrome_options.add_argument('--headless') @@ -287,8 +287,17 @@ def check_idp_response_selenium(sp,idp,test): check_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S') + 'Z' driver.get(wayfless_url) + metadata_not_found = re.search(e2p.METADATAPATTERN,driver.page_source, re.I) + + if (metadata_not_found): + if (test): pgsrc = f"\n[PAGE_SOURCE]\n{page_source}\n[WAYFLESS URL]{wayfless_url} - METADATA NOT FOUND" + else: pgsrc = driver.page_source + stored = store_page_source(pgsrc,idp,sp,test) + if (stored): + return (idp['entityID'],wayfless_url,check_time,http_code,"No-eduGAIN-Metadata",webdriver_error) + # If meet <iframe> load it - if ("<iframe" in driver.page_source): + if ('<iframe' in driver.page_source): seq = driver.find_elements_by_tag_name('iframe') #switching between the iframes based on index @@ -326,18 +335,28 @@ def check_idp_response_selenium(sp,idp,test): metadata_not_found = re.search(e2p.METADATAPATTERN,page_source, re.I) + # This IF is for those IdP that doesn't consuming the eduGAIN metadata and reaching Timeout if (metadata_not_found): if (test): pgsrc = f"\n[PAGE_SOURCE]\n{page_source}\n[WAYFLESS URL]{wayfless_url} - METADATA NOT FOUND" else: pgsrc = page_source stored = store_page_source(pgsrc,idp,sp,test) if (stored): return (idp['entityID'],wayfless_url,check_time,http_code,"No-eduGAIN-Metadata",webdriver_error) + # This IF is for those IdP that reaching out Timeout without any contents on their source page elif(page_source == "<html><head></head><body></body></html>"): if (test): pgsrc = f"\nTimeout: No valid login form loaded in {e2p.ECCS2SELENIUMPAGELOADTIMEOUT} seconds" else: pgsrc = page_source - stored = store_page_source(pgsrc,idp,sp,test) + stored = store_page_source(f"<h1>Timeout - No valid login form loaded into {e2p.ECCS2SELENIUMPAGELOADTIMEOUT} seconds.",idp,sp,test) if (stored): return (idp['entityID'],wayfless_url,check_time,"(failed)","Timeout",webdriver_error) + # This IF is for those IdPs that Timeout is caused by an image or other that do now prevent the Login process. + elif (EC.presence_of_element_located((By.XPATH,'//input[@type="password"]'))): + if (test): pgsrc = f"\n[WAYFLESS URL]{wayfless_url} - Timeout but OK" + else: pgsrc = page_source + stored = store_page_source(pgsrc,idp,sp,test) + if (stored): + return (idp['entityID'],wayfless_url,check_time,http_code,"OK",webdriver_error) + # Final else is for all the other cases else: if (test): pgsrc = f"\nInvalid-Form: No valid login form found in {e2p.ECCS2SELENIUMPAGELOADTIMEOUT} seconds" else: pgsrc = page_source -- GitLab