Select Git revision
helpers.py 1.33 KiB
import re
from typing import List
from sqlalchemy import select
from compendium_v2.db import db, presentation_models
URL_PATTERN = re.compile(
(r'\b(https?://[^\s<>";,(){}\[\]!\\]+'
r'|www\.[^\s<>";,(){}\[\]!\\]+'
r'|[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})'
r'\b(?=\s|\b|[,!?.;:\\])')
)
def get_uppercase_nren_dict():
"""
:return: a dictionary of all known NRENs db entities keyed on the uppercased name
"""
current_nrens = db.session.scalars(select(presentation_models.NREN))
nren_dict = {nren.name.upper(): nren for nren in current_nrens}
# add aliases that are used in the source data:
nren_dict['ASNET'] = nren_dict['ASNET-AM']
nren_dict['KIFU (NIIF)'] = nren_dict['KIFU']
nren_dict['KIFÜ'] = nren_dict['KIFU']
nren_dict['NIIF/HUNGARNET'] = nren_dict['KIFU']
nren_dict['SURFNET'] = nren_dict['SURF']
nren_dict['UOM/RICERKANET'] = nren_dict['UNIVERSITY OF MALTA']
nren_dict['UOM'] = nren_dict['UNIVERSITY OF MALTA']
nren_dict['UNINETT'] = nren_dict['SIKT']
nren_dict['LANET'] = nren_dict['LAT']
nren_dict['ANA'] = nren_dict['RASH']
nren_dict['AZSCIENCENET'] = nren_dict['ANAS']
nren_dict['GRNET S.A.'] = nren_dict['GRNET']
nren_dict['FUNET'] = nren_dict['CSC']
return nren_dict
def extract_urls(text: str) -> List[str]:
return re.findall(URL_PATTERN, text)