Skip to content
Snippets Groups Projects
helpers.py 1.16 KiB
import re

from sqlalchemy import select

from compendium_v2.db import db, model

URL_PATTERN = re.compile(
    r'\b(https?://[^\s<>";,(){}\[\]!\\]+|www\.[^\s<>";,(){}\[\]!\\]+|[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})\b(?=\s|\b|[,!?.;:\\])'
)


def get_uppercase_nren_dict():
    """
    :return: a dictionary of all known NRENs db entities keyed on the uppercased name
    """
    current_nrens = db.session.scalars(select(model.NREN))
    nren_dict = {nren.name.upper(): nren for nren in current_nrens}
    # add aliases that are used in the source data:
    nren_dict['ASNET'] = nren_dict['ASNET-AM']
    nren_dict['KIFU (NIIF)'] = nren_dict['KIFU']
    nren_dict['SURFNET'] = nren_dict['SURF']
    nren_dict['UOM/RICERKANET'] = nren_dict['UNIVERSITY OF MALTA']
    nren_dict['UOM'] = nren_dict['UNIVERSITY OF MALTA']
    nren_dict['UNINETT'] = nren_dict['SIKT']
    nren_dict['LANET'] = nren_dict['LAT']
    nren_dict['ANA'] = nren_dict['RASH']
    nren_dict['AZSCIENCENET'] = nren_dict['ANAS']
    nren_dict['GRNET S.A.'] = nren_dict['GRNET']
    nren_dict['FUNET'] = nren_dict['CSC']
    return nren_dict


def extract_urls(text: str) -> list[str]:
    return re.findall(URL_PATTERN, text)