Skip to content
Snippets Groups Projects

Updated edugain_contacts.py

Closed Romain Wartel requested to merge Romain.Wartel/edugain-contacts:patch-1 into master
1 file
+ 36
8
Compare changes
  • Side-by-side
  • Inline
+ 36
8
@@ -2,25 +2,49 @@
@@ -2,25 +2,49 @@
# This script will download the current edugain metadata aggregate XML
# This script will download the current edugain metadata aggregate XML
# and parse it to derive a list of contacts in CSV format.
# and parse it to derive a list of contacts in CSV format.
# This list will be printed to stdout.
# This list will be printed to contacts/edugain.csv.
 
# It first reads all files in the contacts directory to see which contacts
 
# already exist, as they will be of better quality than the ones in the XML.
import requests
import requests
 
import sys
 
import glob
 
import os.path
from xml.etree import ElementTree as ET
from xml.etree import ElementTree as ET
from urllib.parse import urlparse
from urllib.parse import urlparse
def strip_start(s, start):
if len(sys.argv) != 2:
if s.startswith(start):
print('Please run as: {} <contacts_dir>'.format(sys.argv[0]))
return s[len(start):]
sys.exit(1)
return s
 
print('Downloading XML from edugain...')
xml_req = requests.get('https://mds.edugain.org/edugain-v1.xml')
xml_req = requests.get('https://mds.edugain.org/edugain-v1.xml')
root = ET.fromstring(xml_req.content)
root = ET.fromstring(xml_req.content)
contacts = set()
contacts = set()
seen_doms = set()
seen_doms = set()
 
contacts_dir = sys.argv[1]
 
 
for cf in glob.glob(os.path.join(contacts_dir, '*')):
 
if os.path.basename(cf) == 'edugain.csv':
 
continue
 
with open(cf, 'r') as f:
 
for l in f.readlines():
 
parts = l.split(',')
 
if parts:
 
seen_doms.add(parts[0])
 
 
print('Excluding {} domains that are in other contact CSVs'.format(len(seen_doms)))
 
 
 
def strip_start(s, start):
 
if s.startswith(start):
 
return s[len(start):]
 
return s
 
 
ns = {
ns = {
'md': 'urn:oasis:names:tc:SAML:2.0:metadata',
'md': 'urn:oasis:names:tc:SAML:2.0:metadata',
'mdui': 'urn:oasis:names:tc:SAML:metadata:ui',
'mdui': 'urn:oasis:names:tc:SAML:metadata:ui',
@@ -64,5 +88,9 @@ for entity in entities:
@@ -64,5 +88,9 @@ for entity in entities:
seen_doms.add(domain_text)
seen_doms.add(domain_text)
contacts.add('{},{},{}'.format(domain_text, contact_txt, orgname))
contacts.add('{},{},{}'.format(domain_text, contact_txt, orgname))
for contact in sorted(contacts):
outpath = os.path.join(contacts_dir, 'edugain.csv')
print(contact)
print('Writing {} contacts to {}'.format(len(contacts), outpath))
 
with open(outpath, 'w') as fout:
 
fout.write('domain,contact,name\n\n')
 
for contact in sorted(contacts):
 
fout.write(contact + '\n')
 
\ No newline at end of file
Loading