Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
eduGAIN Connectivity Check
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
edugain
eduGAIN Connectivity Check
Commits
11d4e71c
Commit
11d4e71c
authored
4 years ago
by
Marco Malavolti
Browse files
Options
Downloads
Patches
Plain Diff
Added IdP exclusion with robots.txt
parent
226eb373
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
README.md
+6
-1
6 additions, 1 deletion
README.md
eccs2.py
+35
-18
35 additions, 18 deletions
eccs2.py
eccs2properties.py
+3
-0
3 additions, 0 deletions
eccs2properties.py
with
44 additions
and
19 deletions
README.md
+
6
−
1
View file @
11d4e71c
...
...
@@ -52,7 +52,12 @@ There are some situations where the check cannot work reliably. In those cases i
# Disable Checks
In cases where an IdP cannot be reliably checked, it is necessary to create, also empty,
`eccs-disabled.txt`
file on IdP's web root.
In cases where an IdP cannot be reliably checked, it is necessary to create or enrich the
`robots.txt`
file on the IdP's web root with:
```
bash
User-agent: ECCS
Disallow: /
```
# On-line interface
...
...
This diff is collapsed.
Click to expand it.
eccs2.py
+
35
−
18
View file @
11d4e71c
...
...
@@ -6,7 +6,7 @@ import json
import
re
import
requests
from
eccs2properties
import
DAY
,
ECCS2HTMLDIR
,
ECCS2OUTPUTDIR
,
ECCS2RESULTSLOG
,
FEDS_BLACKLIST
,
IDPS_BLACKLIST
,
ECCS2SPS
,
ECCS2SELENIUMDEBUG
from
eccs2properties
import
DAY
,
ECCS2HTMLDIR
,
ECCS2OUTPUTDIR
,
ECCS2RESULTSLOG
,
FEDS_BLACKLIST
,
IDPS_BLACKLIST
,
ECCS2SPS
,
ECCS2SELENIUMDEBUG
,
ROBOTS_USER_AGENT
from
pathlib
import
Path
from
selenium.common.exceptions
import
TimeoutException
from
urllib3.util
import
parse_url
...
...
@@ -26,6 +26,15 @@ def getIDPfqdn(entityIDidp):
else
:
return
entityIDidp
.
split
(
"
:
"
)[
-
1
]
# Return True if the ECCS check MUST not be run
def
checkRobots
(
url_robots_txt
):
robots_txt
=
requests
.
get
(
url_robots_txt
)
p
=
re
.
compile
(
'
^User-agent:\sECCS\sDisallow:\s\/\s*$
'
,
re
.
MULTILINE
)
m
=
p
.
search
(
robots_txt
.
text
)
if
(
m
):
return
True
else
:
return
False
# The function check that the IdP recognized the SP by presenting its Login page.
# If the IdP Login page contains "username" and "password" fields, than the test is passed.
...
...
@@ -51,36 +60,44 @@ def checkIdP(sp,idp,test):
fqdn_sp
=
parse_url
(
sp
)[
2
]
wayfless_url
=
sp
+
idp
[
'
entityID
'
]
exclude_idp
=
""
robots
=
""
try
:
headers
=
{
'
User-Agent
'
:
'
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36
'
}
exclude_idp
=
requests
.
get
(
"
https://%s/eccs-disabled.txt
"
%
fqdn_idp
,
headers
=
headers
,
verify
=
False
,
timeout
=
30
)
if
(
exclude_idp
==
""
):
exclude_idp
=
requests
.
get
(
"
http://%s/eccs-disabled.txt
"
%
fqdn_idp
,
headers
=
headers
,
verify
=
False
,
timeout
=
30
)
headers
=
{
'
User-Agent
'
:
'
%s
'
%
ROBOTS_USER_AGENT
}
except
requests
.
exceptions
.
ConnectionError
as
e
:
print
(
"
!!! ECCS-DISABLED REQUESTS CONNECTION ERROR EXCEPTION !!!
"
)
#print (e.__str__())
exclude_idp
=
""
robots
=
requests
.
get
(
"
https://%s/robots.txt
"
%
fqdn_idp
,
headers
=
headers
,
verify
=
True
,
timeout
=
30
)
except
requests
.
exceptions
.
Timeout
as
e
:
print
(
"
!!! ECCS-DISABLED REQUESTS TIMEOUT EXCEPTION !!!
"
)
#print (e.__str__())
exclude_idp
=
""
if
(
robots
==
""
):
robots
=
requests
.
get
(
"
http://%s/robots.txt
"
%
fqdn_idp
,
headers
=
headers
,
verify
=
True
,
timeout
=
30
)
if
(
exclude_idp
)
:
except
(
requests
.
exceptions
.
ConnectionError
,
requests
.
exceptions
.
Timeout
,
requests
.
exceptions
.
SSLError
)
as
e
:
check_time
=
datetime
.
datetime
.
utcnow
().
strftime
(
'
%Y-%m-%dT%H:%M:%S
'
)
+
'
Z
'
if
(
test
is
not
True
):
with
open
(
"
%s/%s/%s---%s.html
"
%
(
ECCS2HTMLDIR
,
DAY
,
fqdn_idp
,
fqdn_sp
),
"
w
"
)
as
html
:
html
.
write
(
"
IdP excluded from check b
y eccs-disabled.txt
"
)
html
.
write
(
"
IdP excluded from check b
ecause the download of
'
robots.txt
'
failed: %s
"
%
e
.
__str__
()
)
else
:
print
(
"
IdP excluded from check b
y eccs-disabled.txt
"
)
print
(
"
IdP excluded from check b
ecause the download of
'
robots.txt
'
failed: %s
"
%
e
.
__str__
()
)
return
(
idp
[
'
entityID
'
],
wayfless_url
,
check_time
,
"
NULL
"
,
"
DISABLED
"
)
if
(
robots
):
check_time
=
datetime
.
datetime
.
utcnow
().
strftime
(
'
%Y-%m-%dT%H:%M:%S
'
)
+
'
Z
'
p
=
re
.
compile
(
'
^User-agent:\sECCS\sDisallow:\s\/\s*$
'
,
re
.
MULTILINE
)
m
=
p
.
search
(
robots
.
text
)
if
(
m
):
if
(
test
is
not
True
):
with
open
(
"
%s/%s/%s---%s.html
"
%
(
ECCS2HTMLDIR
,
DAY
,
fqdn_idp
,
fqdn_sp
),
"
w
"
)
as
html
:
html
.
write
(
"
IdP excluded from check by robots.txt
"
)
else
:
print
(
"
IdP excluded from check by robots.txt
"
)
return
(
idp
[
'
entityID
'
],
wayfless_url
,
check_time
,
"
NULL
"
,
"
DISABLED
"
)
if
(
idp
[
'
registrationAuthority
'
]
in
federation_blacklist
):
check_time
=
datetime
.
datetime
.
utcnow
().
strftime
(
'
%Y-%m-%dT%H:%M:%S
'
)
+
'
Z
'
...
...
This diff is collapsed.
Click to expand it.
eccs2properties.py
+
3
−
0
View file @
11d4e71c
...
...
@@ -39,6 +39,9 @@ ECCS2NUMPROCESSES = 25
# The 2 SPs that will be used to test each IdP
ECCS2SPS
=
[
"
https://sp24-test.garr.it/Shibboleth.sso/Login?entityID=
"
,
"
https://attribute-viewer.aai.switch.ch/Shibboleth.sso/Login?entityID=
"
]
# ROBOTS.TXT
ROBOTS_USER_AGENT
=
"
ECCS/2.0 (+https://dev-mm.aai-test.garr.it/eccs2)
"
# Registration Authority of Federations to exclude from the check
FEDS_BLACKLIST
=
[
'
http://www.surfconext.nl/
'
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment