diff --git a/README.md b/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d7ef3866b092d5304362c910992eda5cfbe93900 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,4 @@ +# eduGAIN Contacts + +This script will download the current edugain metadata aggregate XML and parse it +to derive a list of contacts in CSV format. The list will be printed to stdout. diff --git a/edugain_contacts.py b/edugain_contacts.py new file mode 100644 index 0000000000000000000000000000000000000000..298d2e8ace0e6d2338122f9efe9e5cadf009b510 --- /dev/null +++ b/edugain_contacts.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +# This script will download the current edugain metadata aggregate XML +# and parse it to derive a list of contacts in CSV format. +# This list will be printed to stdout. + +import requests +from xml.etree import ElementTree as ET +from urllib.parse import urlparse + + +def strip_start(s, start): + if s.startswith(start): + return s[len(start):] + return s + + +xml_req = requests.get('https://mds.edugain.org/edugain-v1.xml') +root = ET.fromstring(xml_req.content) + +contacts = set() +seen_doms = set() + +ns = { + 'md': 'urn:oasis:names:tc:SAML:2.0:metadata', + 'mdui': 'urn:oasis:names:tc:SAML:metadata:ui', + 'shibmd': 'urn:mace:shibboleth:metadata:1.0', + 'remd': 'http://refeds.org/metadata', + 'icmd': 'http://id.incommon.org/metadata' +} + + +entities = root.findall('./md:EntityDescriptor', ns) + +for entity in entities: + orgname = entity.find('./md:Organization/md:OrganizationDisplayName', ns).text.strip() + if not orgname: + continue + contact = entity.find('./md:ContactPerson[@remd:contactType="http://refeds.org/metadata/contactType/security"]', ns) + if contact is None: + contact = entity.find( + './md:ContactPerson[@icmd:contactType="http://id.incommon.org/metadata/contactType/security"]', ns) + if contact is None: + continue + mail_el = contact.find('./md:EmailAddress', ns) + if mail_el is None: + continue + name_el = contact.find('./md:GivenName', ns) + surname_el = contact.find('./md:SurName', ns) + mail = strip_start(mail_el.text, 'mailto:') + if name_el is not None: + name = name_el.text.strip() + if surname_el is not None: + surname = surname_el.text.strip() + contact_txt = '"{} {}" <{}>'.format(name, surname, mail) + else: + contact_txt = '"{}" <{}>'.format(name, mail) + else: + contact_txt = mail + doms = entity.findall('./md:IDPSSODescriptor/md:Extensions/mdui:DiscoHints/mdui:DomainHint', ns) + \ + entity.findall('./md:IDPSSODescriptor/md:Extensions/shibmd:Scope[@regexp="false"]', ns) + doms = {strip_start(dom.text, 'www.') for dom in doms} + if len(doms) == 0: + orgurl = entity.find('./md:Organization/md:OrganizationURL', ns) + if orgurl is None: + continue + baseurl = strip_start(urlparse(orgurl.text).netloc, 'www.') + if ':' in baseurl: + baseurl = baseurl.split(':')[0] + doms = {baseurl} + for domain in doms: + if domain not in seen_doms: + seen_doms.add(domain) + contacts.add('{},{},{}'.format(domain, contact_txt, orgname)) + +for contact in sorted(contacts): + print(contact)