From dc666b5f04d62db32f80d981e7015e83e449b04d Mon Sep 17 00:00:00 2001
From: Marco Malavolti <marco.malavolti@gmail.com>
Date: Fri, 19 Jun 2020 14:09:59 +0200
Subject: [PATCH] Added timeout to requests

---
 api.py             |   2 -
 eccs2.py           | 136 ++++++++++++++++-----------------------------
 eccs2properties.py |   3 +
 runEccs2.py        |  52 +----------------
 utils.py           | 131 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 183 insertions(+), 141 deletions(-)
 create mode 100644 utils.py

diff --git a/api.py b/api.py
index 9ea908e..1236b98 100755
--- a/api.py
+++ b/api.py
@@ -253,7 +253,6 @@ class EccsResults(Resource):
                         'status' : idp_checks_status
                     } )
           elif (status):
-              app.logger.info("Results for status '%s'." % status)
               if (status == idp_checks_status):
                  result.append( 
                     { 
@@ -276,7 +275,6 @@ class EccsResults(Resource):
                         'status' : idp_checks_status
                     } )
           else:
-             app.logger.info("All checks.")
              result.append( 
              { 
                  'displayName' : idp_displayname,
diff --git a/eccs2.py b/eccs2.py
index 721c8d9..dd2e002 100755
--- a/eccs2.py
+++ b/eccs2.py
@@ -2,24 +2,20 @@
 
 import argparse
 import json
-import logging
-import os
-import signal
 import re
 import requests
+import time
 
 from datetime import date
-from eccs2properties import ECCS2LOGSDIR, ECCS2RESULTSLOG, ECCS2CHECKSLOG, ECCS2SELENIUMLOGDIR, FEDS_BLACKLIST, IDPS_BLACKLIST, ECCS2SELENIUMPAGELOADTIMEOUT, ECCS2SELENIUMSCRIPTTIMEOUT, ECCS2SPS
-from selenium import webdriver
+from eccs2properties import ECCS2LOGSDIR, ECCS2RESULTSLOG, ECCS2CHECKSLOG, FEDS_BLACKLIST, IDPS_BLACKLIST, ECCS2SPS, ECCS2SELENIUMDEBUG
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support.ui import Select, WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.remote.remote_connection import LOGGER
 from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException, UnexpectedAlertPresentException
 from urllib3.exceptions import MaxRetryError
 from urllib3.util import parse_url
-
+from utils import getLogger, getIdPContacts, getDriver
 
 """
   This script use Selenium and Chromium to select the IdP to check from a Shibboleth SP with the Shibboleth Embedded Discovery Service installed and configured to answer to all eduGAIN IdPs.
@@ -27,28 +23,15 @@ from urllib3.util import parse_url
   The check will be passed when both SPs will return the authentication page of the IdP checked.
 """
 
-#def checkIdP(sp,idp,logger,driver):
 def checkIdP(sp,idp,logger):
    # Chromedriver MUST be instanced here to avoid problems with SESSION
 
    # Disable SSL requests warning messages
    requests.packages.urllib3.disable_warnings()
 
-   # Configure Web-driver
-   chrome_options = webdriver.ChromeOptions()
-   chrome_options.add_argument('--headless')
-   chrome_options.add_argument('--no-sandbox')
-   chrome_options.add_argument('--disable-dev-shm-usage')
-   chrome_options.add_argument('--ignore-certificate-errors')
-
-   driver = webdriver.Chrome('chromedriver', options=chrome_options)
-
-   # For DEBUG only (By default ChromeDriver logs only warnings/errors to stderr. When debugging issues, it is helpful to enable more verbose logging.)
-   #driver = webdriver.Chrome('chromedriver', options=chrome_options,  service_args=['--verbose', '--log-path=%s/%s.log' % (ECCS2SELENIUMLOGDIR, parse_url(idp['entityID'])[2])])
-
-   # Configure timeouts
-   driver.set_page_load_timeout("%d" % ECCS2SELENIUMPAGELOADTIMEOUT)
-   driver.set_script_timeout("%d" % ECCS2SELENIUMSCRIPTTIMEOUT)
+   debugSelenium = ECCS2SELENIUMDEBUG
+   fqdn_idp = parse_url(idp['entityID'])[2]
+   driver = getDriver(fqdn_idp,debugSelenium)
 
    # Configure Blacklists
    federation_blacklist = FEDS_BLACKLIST
@@ -68,8 +51,7 @@ def checkIdP(sp,idp,logger):
       element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.ID,"idpSelectInput"))) 
       element.send_keys(idp['entityID'] + Keys.ENTER)
       page_source = driver.page_source
-      status_code = requests.get(driver.current_url, verify=False).status_code
-      driver.quit()
+      samlrequest_url = driver.current_url
 
    except TimeoutException as e:
      logger.info("%s;%s;999;Timeout" % (idp['entityID'],sp))
@@ -90,24 +72,13 @@ def checkIdP(sp,idp,logger):
    except WebDriverException as e:
      print("!!! WEB DRIVER EXCEPTION - RUN AGAIN THE COMMAND!!!")
      print (e.__str__())
-     return None
-
-   except requests.exceptions.ConnectionError as e:
-     logger.info("%s;%s;000;ConnectionError" % (idp['entityID'],sp))
-     return "ERROR"
-
-   except requests.exceptions.TooManyRedirects as e:
-     logger.info("%s;%s;111;TooManyRedirects" % (idp['entityID'],sp))
-     return "ERROR"
-
-   except requests.exceptions.RequestException as e:
-     print ("!!! REQUESTS EXCEPTION !!!")
-     print (e.__str__())
+     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      return None
 
    except Exception as e:
      print ("!!! EXCEPTION !!!")
      print (e.__str__())
+     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
      return None
 
    finally:
@@ -122,6 +93,41 @@ def checkIdP(sp,idp,logger):
    username_found = re.search(pattern_username,page_source, re.I)
    password_found = re.search(pattern_password,page_source, re.I)
 
+   try:
+      headers = {'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
+      status_code = requests.get(samlrequest_url, headers=headers, verify=False, timeout=30).status_code
+
+   except requests.exceptions.ConnectionError as e:
+     #print("!!! REQUESTS STATUS CODE CONNECTION ERROR EXCEPTION !!!")
+     #print (e.__str__())
+     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
+     status_code = 000
+
+   except requests.exceptions.Timeout as e:
+     #print("!!! REQUESTS STATUS CODE TIMEOUT EXCEPTION !!!")
+     #print (e.__str__())
+     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
+     status_code = 111
+
+   except requests.exceptions.TooManyRedirects as e:
+     #print("!!! REQUESTS TOO MANY REDIRECTS EXCEPTION !!!")
+     #print (e.__str__())
+     #print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
+     status_code = 222
+
+   except requests.exceptions.RequestException as e:
+     print ("!!! REQUESTS EXCEPTION !!!")
+     print (e.__str__())
+     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
+     status_code = 333
+
+   except Exception as e:
+     print ("!!! EXCEPTION !!!")
+     print (e.__str__())
+     print ("IdP: %s\nSP: %s" % (idp['entityID'],sp))
+     status_code = 555
+
+
    if(metadata_not_found):
       logger.info("%s;%s;%s;No-eduGAIN-Metadata" % (idp['entityID'],sp,status_code))
       return "No-eduGAIN-Metadata"
@@ -133,53 +139,7 @@ def checkIdP(sp,idp,logger):
       return "OK"
 
 
-# Use logger to produce files consumed by ECCS-2 API
-def getLogger(filename, path=".", log_level="DEBUG"):
-
-    logger = logging.getLogger(filename)
-    ch = logging.FileHandler(path + '/' + filename,'a','utf-8')
-
-    if (log_level == "DEBUG"):
-       logger.setLevel(logging.DEBUG)
-       ch.setLevel(logging.DEBUG)
-    elif (log_level == "INFO"):
-       logger.setLevel(logging.INFO)
-       ch.setLevel(logging.INFO)
-    elif (log_level == "WARN"):
-       logger.setLevel(logging.WARN)
-       ch.setLevel(logging.WARN)
-    elif (log_level == "ERROR"):
-       logger.setLevel(logging.ERROR)
-       ch.setLevel(logging.ERROR)
-    elif (log_level == "CRITICAL"):
-       logger.setLevel(logging.CRITICAL)
-       ch.setLevel(logging.CRITICAL)
-
-    formatter = logging.Formatter('%(message)s')
-    ch.setFormatter(formatter)
-    logger.addHandler(ch)
-
-    return logger
-
-
-# Return a list of email address for a specific type of contact
-def getIdPContacts(idp,contactType):
-
-   ctcList = []
-   for ctcType in idp['contacts']:
-      if (ctcType == contactType):
-         for ctc in idp['contacts'][contactType]:
-            if (ctc.get('emailOrPhone')):
-               if (ctc['emailOrPhone'].get('EmailAddress')):
-                  ctcList.append(ctc['emailOrPhone']['EmailAddress'][0])
-               else:
-                  ctcList.append('missing email')
-            else:
-               ctcList.append('missing email')
-
-   return ctcList
-
-def checkIdp(idp,sps,eccs2log,eccs2checksLog):
+def check(idp,sps,eccs2log,eccs2checksLog):
       result = []
       for sp in sps:
          resultCheck = checkIdP(sp,idp,eccs2checksLog)
@@ -236,8 +196,8 @@ def checkIdp(idp,sps,eccs2log,eccs2checksLog):
 # MAIN
 if __name__=="__main__":
 
-   eccs2log = getLogger(ECCS2RESULTSLOG, ECCS2LOGSDIR, "INFO")
-   eccs2checksLog = getLogger(ECCS2CHECKSLOG, ECCS2LOGSDIR, "INFO")
+   eccs2log = getLogger(ECCS2RESULTSLOG, ECCS2LOGSDIR, 'a', "INFO")
+   eccs2checksLog = getLogger(ECCS2CHECKSLOG, ECCS2LOGSDIR, 'a', "INFO")
 
    sps = ECCS2SPS
 
@@ -248,4 +208,4 @@ if __name__=="__main__":
 
    idp = json.loads(args.idpJson[0])
 
-   checkIdp(idp,sps,eccs2log,eccs2checksLog)
+   check(idp,sps,eccs2log,eccs2checksLog)
diff --git a/eccs2properties.py b/eccs2properties.py
index 33d2a49..67027d0 100644
--- a/eccs2properties.py
+++ b/eccs2properties.py
@@ -25,6 +25,9 @@ ECCS2STDERR = "%s/stderr.log" % ECCS2LOGSDIR
 ECCS2SELENIUMPAGELOADTIMEOUT = 30
 ECCS2SELENIUMSCRIPTTIMEOUT = 30
 
+# Selenium Debug Enable/Disable
+ECCS2SELENIUMDEBUG = True
+
 # Number of processes to run in parallel
 ECCS2NUMPROCESSES = 20
 
diff --git a/runEccs2.py b/runEccs2.py
index cf19f39..70221e0 100755
--- a/runEccs2.py
+++ b/runEccs2.py
@@ -4,62 +4,12 @@ import asyncio
 import datetime
 import eccs2properties
 import json
-import pathlib
-import requests
-import sys
 import time
+from utils import getListFeds, getListEccsIdps, getRegAuthDict, getIdpList
 
 from eccs2properties import ECCS2STDOUT, ECCS2STDERR, ECCS2DIR, ECCS2NUMPROCESSES, ECCS2LISTIDPSURL, ECCS2LISTIDPSFILE, ECCS2LISTFEDSURL, ECCS2LISTFEDSFILE 
 from subprocess import Popen,PIPE
 
-# Returns a Dict on "{ nameFed:reg_auth }"
-def getRegAuthDict(list_feds):
-   regAuth_dict = {}
-
-   for key,value in list_feds.items():
-      name = value['name']
-      reg_auth = value['reg_auth']
-
-      regAuth_dict[name] = reg_auth
-
-   return regAuth_dict
-
-
-# Returns a list of IdP for a single federation
-def getIdpList(list_eccs_idps,reg_auth):
-
-   fed_idp_list = []
-   for idp in list_eccs_idps:
-      if (idp['registrationAuthority'] == reg_auth):
-         fed_idp_list.append(idp)
-
-   return fed_idp_list
-
-
-# Returns a Python Dictionary
-def getListFeds(url, dest_file):
-   # If file does not exists... download it into the dest_file
-   path = pathlib.Path(dest_file)
-   if(path.exists() == False):
-      with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
-         f.write(requests.get(url).text)
-
-   # then open it and work with local file
-   with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
-      return json.loads(f.read().replace("'", "&apos;"))
-
-
-# Returns a Python List
-def getListEccsIdps(url, dest_file):
-   # If file does not exists... download it into the dest_file
-   path = pathlib.Path(dest_file)
-   if(path.exists() == False):
-      with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
-         f.write(requests.get(url).text)
-
-   # then open it and work with local file
-   with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
-      return json.loads(f.read().replace("'", "&apos;"))
 
 # Run Command
 async def run(name,queue,stdout_file,stderr_file):
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..ad16349
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3.8
+
+import json
+import logging
+import pathlib
+import requests
+
+from eccs2properties import ECCS2SELENIUMLOGDIR, ECCS2SELENIUMPAGELOADTIMEOUT, ECCS2SELENIUMSCRIPTTIMEOUT
+from selenium import webdriver
+
+
+# Returns a Dict of "{ nameFed:reg_auth }"
+def getRegAuthDict(list_feds):
+    regAuth_dict = {}
+
+    for key,value in list_feds.items():
+       name = value['name']
+       reg_auth = value['reg_auth']
+
+       regAuth_dict[name] = reg_auth
+
+    return regAuth_dict
+
+
+# Returns a list of IdP for a single federation
+def getIdpList(list_eccs_idps,reg_auth=None):
+    fed_idp_list = []
+    for idp in list_eccs_idps:
+       if (reg_auth):
+          if (idp['registrationAuthority'] == reg_auth):
+             fed_idp_list.append(idp)
+       else:
+          fed_idp_list.append(idp)
+
+    return fed_idp_list
+
+
+# Returns a Python Dictionary
+def getListFeds(url, dest_file):
+    # If file does not exists... download it into the dest_file
+    path = pathlib.Path(dest_file)
+    if(path.exists() == False):
+       with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
+            f.write(requests.get(url).text)
+
+    # then open it and work with local file
+    with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
+         return json.loads(f.read().replace("'", "&apos;"))
+
+
+# Download all eduGAIN IdPs from URL, store them on a local file and returns a Python List
+def getListEccsIdps(url, dest_file):
+    # If file does not exists... download it into the dest_file
+    path = pathlib.Path(dest_file)
+    if(path.exists() == False):
+       with open("%s" % (dest_file), mode="w+", encoding='utf-8') as f:
+            f.write(requests.get(url).text)
+
+    # then open it and work with local file
+    with open("%s" % (dest_file), mode="r", encoding='utf-8') as f:
+         return json.loads(f.read().replace("'", "&apos;"))
+
+
+# Use logger to produce files consumed by ECCS-2 API
+def getLogger(filename, path, mode, log_level="DEBUG"):
+    logger = logging.getLogger(filename)
+    ch = logging.FileHandler("%s/%s" % (path,filename), mode,'utf-8')
+
+    if (log_level == "DEBUG"):
+       logger.setLevel(logging.DEBUG)
+       ch.setLevel(logging.DEBUG)
+    elif (log_level == "INFO"):
+       logger.setLevel(logging.INFO)
+       ch.setLevel(logging.INFO)
+    elif (log_level == "WARN"):
+       logger.setLevel(logging.WARN)
+       ch.setLevel(logging.WARN)
+    elif (log_level == "ERROR"):
+       logger.setLevel(logging.ERROR)
+       ch.setLevel(logging.ERROR)
+    elif (log_level == "CRITICAL"):
+       logger.setLevel(logging.CRITICAL)
+       ch.setLevel(logging.CRITICAL)
+
+    formatter = logging.Formatter('%(message)s')
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+
+    return logger
+
+
+# Return a list of email address for a specific type of contact
+def getIdPContacts(idp,contactType):
+    ctcList = []
+    for ctcType in idp['contacts']:
+        if (ctcType == contactType):
+           for ctc in idp['contacts'][contactType]:
+               if (ctc.get('emailOrPhone')):
+                  if (ctc['emailOrPhone'].get('EmailAddress')):
+                     ctcList.append(ctc['emailOrPhone']['EmailAddress'][0])
+                  else:
+                     ctcList.append('missing email')
+               else:
+                  ctcList.append('missing email')
+    return ctcList
+
+
+def getDriver(fqdn_idp=None,debugSelenium=False):
+    # Disable SSL requests warning messages
+    requests.packages.urllib3.disable_warnings()
+
+    # Configure Web-driver
+    chrome_options = webdriver.ChromeOptions()
+    chrome_options.add_argument('--headless')
+    chrome_options.add_argument('--no-sandbox')
+    chrome_options.add_argument('--disable-dev-shm-usage')
+    chrome_options.add_argument('--ignore-certificate-errors')
+    #chrome_options.add_argument('--start-maximized')
+
+    # For DEBUG only (By default ChromeDriver logs only warnings/errors to stderr.
+    # When debugging issues, it is helpful to enable more verbose logging.)
+    if (debugSelenium and fqdn_idp):
+       driver = webdriver.Chrome('chromedriver', options=chrome_options,  service_args=['--verbose', '--log-path=%s/%s.log' % (ECCS2SELENIUMLOGDIR, fqdn_idp)])
+    else:
+       driver = webdriver.Chrome('chromedriver', options=chrome_options)
+
+    # Configure timeouts
+    driver.set_page_load_timeout("%d" % ECCS2SELENIUMPAGELOADTIMEOUT)
+    driver.set_script_timeout("%d" % ECCS2SELENIUMSCRIPTTIMEOUT)
+
+    return driver
-- 
GitLab