From 03a25e6fe83820d771b1efefa18730cfde31f699 Mon Sep 17 00:00:00 2001
From: Marco Malavolti <marco.malavolti@gmail.com>
Date: Wed, 13 Oct 2021 11:21:09 +0200
Subject: [PATCH] Fixed issue #6 and #7

---
 .gitlab/issue_templates/bug.md             |  3 ---
 .gitlab/issue_templates/feature_request.md |  2 +-
 README.md                                  | 13 ++++++++++---
 eccs_properties.py                         |  2 +-
 utils.py                                   | 11 +++++++++++
 5 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/.gitlab/issue_templates/bug.md b/.gitlab/issue_templates/bug.md
index 2d72dcd..776c685 100644
--- a/.gitlab/issue_templates/bug.md
+++ b/.gitlab/issue_templates/bug.md
@@ -12,9 +12,6 @@ and verify that the issue you're about to submit isn't a duplicate.
 
 <!-- Summarize the bug encountered concisely. -->
 
-### Context
-<!-- Describe what are the benefits that the issue brings to other users -->
-
 ### Steps to reproduce
 
 <!-- Describe how the issue can be reproduced - this is very important. Please use an ordered list. -->
diff --git a/.gitlab/issue_templates/feature_request.md b/.gitlab/issue_templates/feature_request.md
index a3dcab4..6892af3 100644
--- a/.gitlab/issue_templates/feature_request.md
+++ b/.gitlab/issue_templates/feature_request.md
@@ -20,4 +20,4 @@ and verify that the issue you're about to submit isn't a duplicate.
 
 (If you can, link to the line of code that might be responsible for the problem)
 
-/label ~feature
+/label ~"feature-request"
diff --git a/README.md b/README.md
index 289b4ec..fa2944c 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,11 @@ User-agent: ECCS
 Disallow: /
 ```
 
+If an IdP is not able to create its own `robots.txt` file under the web root directory, it can be disabled by setting the dictionary `IDPS_DISABLED_DICT` into `eccs_properties.py` with a line in the form:
+
+'<idp-entity-id>':'<eccs-check-disabling-reason>'
+
+
 # On-line interface
 
 The test eduGAIN Connectivity Check web pages is available at: https://technical-test.edugain.org/eccs
@@ -71,18 +76,19 @@ The tool uses following status for IdPs:
 
 * ERROR (red):
   * The IdP's response contains an HTTP Error or the web page returned does not look like a login page.
-    * **Invalid-Form**: considers those IdPs that do not load a standard username/password login page and do not return messages like "No return endpoint available for relying party" or "No metadata found for relying party".
+    * **Invalid-Form**: considers those IdPs that do not load a standard username/password login page and do not return messages like "*No return endpoint available for relying party*" or "*No metadata found for relying party"*.
     * **Timeout**: considers those IdPs that do not load a standard username/password login page within 60 seconds.
     * **Connection-Error**: considers those IdPs that are not reachable due to a connection problem. View the "Page Source" value to discover which problem the IdP has. 
+    * **IdP-Error**: considers those IdPs that the web page returned does not contain a Login Form and reports an unspecified error such as "*An error occured*". This has been seen on Micrsoft ADFS based IdPs
   * The IdP most likely does not consume the eduGAIN metadata correctly.
-    A typical case that falls into this category is when an IdP returns a message "No return endpoint available for relying party" or "No metadata found for relying party":
+    A typical case that falls into this category is when an IdP returns a message "*No return endpoint available for relying party*" or "*No metadata found for relying party*":
     * **No-eduGAIN-Metadata**
   * The IdP has a problem with its SSL certificate:
     * **SSL-Error**
 * OK (green):
   * The IdP most likely correctly consumes eduGAIN metadata and returns a valid login page. This is no guarantee that login on this IdP works for all eduGAIN services but if the check is passed for an IdP, this is probable.
 * DISABLED (white)
-  * The IdP is excluded because it cannot be checked reliably. The "Page Source" column, when an entity is disabled, shows the reason of the disabling.
+  * The IdP is excluded because it cannot be checked reliably. The "*Page Source*" column, when an entity is disabled, shows the reason of the disabling.
 
 # Requirements Hardware
 
@@ -325,6 +331,7 @@ To perform a restart after an API change use the following command:
     * `Timeout`
     * `Invalid-Form`
     * `Connection-Error`
+    * `IdP-Error`
     * `No-eduGAIN-Metadata`
     * `SSL-Error`
     * `DISABLED`
diff --git a/eccs_properties.py b/eccs_properties.py
index 26b9cb4..eb63097 100644
--- a/eccs_properties.py
+++ b/eccs_properties.py
@@ -48,8 +48,8 @@ ECCS_SPS = [
 ROBOTS_USER_AGENT = "ECCS/2.0 (+https://technical.edugain.org/eccs)"
 
 # PATTERNS
+IDPERROR = "error.occurred"
 METADATAPATTERN = "Unable.to.locate(\sissuer.in|).metadata(\sfor|)|no.metadata.found|profile.is.not.configured.for.relying.party|Cannot.locate.entity|fail.to.load.unknown.provider|does.not.recognise.the.service|unable.to.load.provider|Nous.n'avons.pas.pu.(charg|charger).le.fournisseur.de service|Metadata.not.found|application.you.have.accessed.is.not.registered.for.use.with.this.service|Message.did.not.meet.security.requirements|Unsupported.Request|Not.Authorized|METADATANOTFOUND|Unknown.login.requester|is.unspecified.or.unsupported|Unknown.service.provider|Richiesta.non.supportata|Metadati.non.trovati|untrusted.provider|Unregistered.Service|Unsupported.request|UNHANDLEDEXCEPTION|Metadata.*.expired|Could.not.find.any.*.metadata.*.for"
-USERNAMEPATTERN = '<input[\s]+[^>]*((type=\s*[\'"](text|email)[\'"]|user)|(name=\s*[\'"](name)[\'"]))[^>]*>'
 PASSWORDPATTERN = '<input[\s]+[^>]*(type=\s*[\'"]password[\'"]|password)[^>]*>'
 REFUSEDPATTERN = '(^http)(.*\.png$)|(.*\.css$)|(.*\.js$)|(.*\.gif$)|(.*\.svg$)|(.*\.jpg$)'
 
diff --git a/utils.py b/utils.py
index cc89665..9a0ee08 100644
--- a/utils.py
+++ b/utils.py
@@ -273,6 +273,8 @@ def check_idp_response_selenium(sp,idp,test):
 
        metadata_not_found = re.search(e_p.METADATAPATTERN,driver.page_source, re.I)
 
+       idp_error = re.search(e_p.IDPERROR,driver.page_source, re.I)
+
        if (metadata_not_found):
           if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\n[WAYFLESS URL]{wayfless_url} - METADATA NOT FOUND"
           else: pgsrc = driver.page_source
@@ -280,10 +282,19 @@ def check_idp_response_selenium(sp,idp,test):
           if (stored):
              return (idp['entityID'],wayfless_url,check_time,"No-eduGAIN-Metadata",webdriver_error)
 
+       if (idp_error):
+          if (test): pgsrc = f"\n[PAGE_SOURCE]\n{driver.page_source}\n[WAYFLESS URL]{wayfless_url} - IDP ERROR"
+          else: pgsrc = driver.page_source
+          stored = store_page_source(pgsrc,idp,sp,test)
+          if (stored):
+             return (idp['entityID'],wayfless_url,check_time,"IdP-Error",webdriver_error)
+
        # If meet <iframe> follow all iframes
        if ('<iframe' in driver.page_source):
           follow_all_nested_iframes(driver)
 
+       driver.refresh()
+
        WebDriverWait(driver, e_p.ECCS_SELENIUMPAGELOADTIMEOUT).until(
           EC.presence_of_element_located((By.XPATH,'//input[@type="password"]'))
        )
-- 
GitLab