Skip to content
Snippets Groups Projects
entities_security_contacts.py 2.81 KiB
#!/usr/bin/env python3

# This script will download the current edugain metadata aggregate XML
# and parse it to derive a list of contacts in CSV format.
# This list will be printed to stdout.

import requests
from xml.etree import ElementTree as ET


def strip_start(s, start):
    if s.startswith(start):
        return s[len(start):]
    return s


xml_req = requests.get('https://mds.edugain.org/edugain-v1.xml')
root = ET.fromstring(xml_req.content)

contacts = set()
seen_doms_mails = set()

ns = {
    'md': 'urn:oasis:names:tc:SAML:2.0:metadata',
    'mdui': 'urn:oasis:names:tc:SAML:metadata:ui',
    'shibmd': 'urn:mace:shibboleth:metadata:1.0',
    'remd': 'http://refeds.org/metadata',
    'icmd': 'http://id.incommon.org/metadata',
    'mdrpi': 'urn:oasis:names:tc:SAML:metadata:rpi',
}


entities = root.findall('./md:EntityDescriptor', ns)

for entity in entities:
    sec_mails = set()
    entity_id = entity.attrib['entityID'].strip()
    registration_authority = ''
    registration_info = entity.find('./md:Extensions/mdrpi:RegistrationInfo', ns)
    if registration_info == None:
        continue
    else:
        registration_authority = registration_info.attrib['registrationAuthority'].strip()
    orgname = entity.find('./md:Organization/md:OrganizationDisplayName', ns).text.strip()
    if not orgname:
        continue
    sec_contact_els = entity.findall('./md:ContactPerson[@remd:contactType="http://refeds.org/metadata/contactType/security"]', ns) + \
    entity.findall('./md:ContactPerson[@icmd:contactType="http://id.incommon.org/metadata/contactType/security"]', ns)    
    for sec_contact_el in sec_contact_els:
        mail_el = sec_contact_el.find('./md:EmailAddress', ns)
        name_el = sec_contact_el.find('./md:GivenName', ns)
        surname_el = sec_contact_el.find('./md:SurName', ns)
        if mail_el is None:
            continue
        mail = strip_start(mail_el.text, 'mailto:')
        if name_el is not None:
            name = name_el.text.strip()
            if surname_el is not None:
                surname = surname_el.text.strip()
                sec_mails.add('"{} {}" <{}>'.format(name, surname, mail))
            else:
                sec_mails.add('"{}" <{}>'.format(name, mail))
        else:
            sec_mails.add(mail)
    doms = entity.findall('./md:IDPSSODescriptor/md:Extensions/shibmd:Scope[@regexp="false"]', ns)
    doms_set = set()
    for dom in doms:
        doms_set.add(dom.text)
    for domain in doms_set:
        for mail in sec_mails:
            if (domain,mail) not in seen_doms_mails:
                seen_doms_mails.add((domain, mail))
                contacts.add('{},{},{},{},{}'.format(registration_authority, entity_id, domain, mail, orgname))

for contact in sorted(contacts):
    print('RegistrationAuthority,entityID,scope,security-contact,OrganizationName')
    print(contact)