First commit

9bdc0095 · Davide Vaghetti · bdf40c67 · 9bdc0095 · 9bdc0095
Commit 9bdc0095 authored 4 years ago by Davide Vaghetti
--- a/README.md
+++ b/README.md
+# eduGAIN Contacts
+
+This script will download the current edugain metadata aggregate XML and parse it
+to derive a list of contacts in CSV format. The list will be printed to stdout.
--- a/edugain_contacts.py
+++ b/edugain_contacts.py
+#!/usr/bin/env python3
+
+# This script will download the current edugain metadata aggregate XML
+# and parse it to derive a list of contacts in CSV format.
+# This list will be printed to stdout.
+
+import requests
+from xml.etree import ElementTree as ET
+from urllib.parse import urlparse
+
+
+def strip_start(s, start):
+    if s.startswith(start):
+        return s[len(start):]
+    return s
+
+
+xml_req = requests.get('https://mds.edugain.org/edugain-v1.xml')
+root = ET.fromstring(xml_req.content)
+
+contacts = set()
+seen_doms = set()
+
+ns = {
+    'md': 'urn:oasis:names:tc:SAML:2.0:metadata',
+    'mdui': 'urn:oasis:names:tc:SAML:metadata:ui',
+    'shibmd': 'urn:mace:shibboleth:metadata:1.0',
+    'remd': 'http://refeds.org/metadata',
+    'icmd': 'http://id.incommon.org/metadata'
+}
+
+
+entities = root.findall('./md:EntityDescriptor', ns)
+
+for entity in entities:
+    orgname = entity.find('./md:Organization/md:OrganizationDisplayName', ns).text.strip()
+    if not orgname:
+        continue
+    contact = entity.find('./md:ContactPerson[@remd:contactType="http://refeds.org/metadata/contactType/security"]', ns)
+    if contact is None:
+        contact = entity.find(
+            './md:ContactPerson[@icmd:contactType="http://id.incommon.org/metadata/contactType/security"]', ns)
+    if contact is None:
+        continue
+    mail_el = contact.find('./md:EmailAddress', ns)
+    if mail_el is None:
+        continue
+    name_el = contact.find('./md:GivenName', ns)
+    surname_el = contact.find('./md:SurName', ns)
+    mail = strip_start(mail_el.text, 'mailto:')
+    if name_el is not None:
+        name = name_el.text.strip()
+        if surname_el is not None:
+            surname = surname_el.text.strip()
+            contact_txt = '"{} {}" <{}>'.format(name, surname, mail)
+        else:
+            contact_txt = '"{}" <{}>'.format(name, mail)
+    else:
+        contact_txt = mail
+    doms = entity.findall('./md:IDPSSODescriptor/md:Extensions/mdui:DiscoHints/mdui:DomainHint', ns) + \
+        entity.findall('./md:IDPSSODescriptor/md:Extensions/shibmd:Scope[@regexp="false"]', ns)
+    doms = {strip_start(dom.text, 'www.') for dom in doms}
+    if len(doms) == 0:
+        orgurl = entity.find('./md:Organization/md:OrganizationURL', ns)
+        if orgurl is None:
+            continue
+        baseurl = strip_start(urlparse(orgurl.text).netloc, 'www.')
+        if ':' in baseurl:
+            baseurl = baseurl.split(':')[0]
+        doms = {baseurl}
+    for domain in doms:
+        if domain not in seen_doms:
+            seen_doms.add(domain)
+            contacts.add('{},{},{}'.format(domain, contact_txt, orgname))
+
+for contact in sorted(contacts):
+    print(contact)