diff --git a/Changelog.md b/Changelog.md index 60ccf9a4670520b7265f8c3a67a3db428192fae6..27d91653552e9d175003b8ad4765a397a0431d4b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [0.85] - 2025-01-28 +- Add back in old data publishers until the exported survey data is manually validated + ## [0.84] - 2025-01-28 - Fix budget data import from excel file to include 2016/2021 - Use the new React compiler for auto-memoized components (as long as they are pure) diff --git a/compendium_v2/publishers/survey_publisher_legacy_excel.py b/compendium_v2/publishers/survey_publisher_legacy_excel.py new file mode 100644 index 0000000000000000000000000000000000000000..c73ef1aff54c14256539e06161df9a0be90cd004 --- /dev/null +++ b/compendium_v2/publishers/survey_publisher_legacy_excel.py @@ -0,0 +1,638 @@ +""" +survey_publisher_v1 +========================= + +This module loads the survey data from before 2022 from a legacy Excel files. +Missing info is filled in from the survey db for some questions. +Registered as click cli command when installing compendium-v2. + +""" +from __future__ import annotations +import itertools +import logging +import math +import click + +from sqlalchemy import select, delete +from collections import defaultdict + +import compendium_v2 +from compendium_v2.config import load +from compendium_v2.db import db, presentation_models +from compendium_v2.environment import setup_logging +from compendium_v2.publishers import helpers, excel_parser +from compendium_v2.survey_db import model as survey_model + +setup_logging() + +logger = logging.getLogger('survey-publisher-legacy-excel') + + +def db_budget_migration(nren_dict): + # move data from Survey DB budget table + data = db.session.scalars(select(survey_model.Nrens)) + db.session.execute(delete(presentation_models.BudgetEntry).where( + presentation_models.BudgetEntry.year < 2022)) + inserts = defaultdict(dict) + for nren in data: + for budget in nren.budgets: + abbrev = nren.abbreviation.upper() + year = budget.year + + if float(budget.budget) > 400: + logger.warning(f'Incorrect Data: {abbrev} has budget set >400M EUR for {year}. ({budget.budget})') + continue + + if float(budget.budget) == 0: + logger.warning(f'Incorrect Data: {abbrev} has budget set to 0 EUR for {year}.') + continue + + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + budget_entry = { + 'nren': nren_dict[abbrev], + 'nren_id': nren_dict[abbrev].id, + 'budget': float(budget.budget), + 'year': year + } + inserts[nren_dict[abbrev].id][year] = budget_entry + + # Import the data from excel sheet to database + exceldata = excel_parser.fetch_budget_excel_data() + + for abbrev, budget, year in exceldata: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + if budget > 400: + logger.warning(f'{nren} has budget set to >400M EUR for {year}. ({budget})') + continue + + budget_entry = { + 'nren': nren_dict[abbrev], + 'nren_id': nren_dict[abbrev].id, + 'budget': budget, + 'year': year + } + inserts[nren_dict[abbrev].id][year] = budget_entry + all_inserts = list(itertools.chain([i for y in inserts.values() for i in y.values()])) + db.session.bulk_insert_mappings(presentation_models.BudgetEntry, all_inserts) + db.session.commit() + + +def db_funding_migration(nren_dict): + # Import the data to database + data = excel_parser.fetch_funding_excel_data() + db.session.execute(delete(presentation_models.FundingSource).where( + presentation_models.FundingSource.year < 2022)) + inserts = [] + for (abbrev, year, client_institution, + european_funding, + gov_public_bodies, + commercial, other) in data: + + _data = [client_institution, european_funding, gov_public_bodies, commercial, other] + total = sum(_data) + if not math.isclose(total, 100, abs_tol=0.01): + logger.warning(f'{abbrev} funding sources for {year} do not sum to 100% ({total})') + continue + + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + inserts.append({ + 'nren': nren_dict[abbrev], + 'nren_id': nren_dict[abbrev].id, + 'year': year, + 'client_institutions': client_institution, + 'european_funding': european_funding, + 'gov_public_bodies': gov_public_bodies, + 'commercial': commercial, + 'other': other + }) + db.session.bulk_insert_mappings(presentation_models.FundingSource, inserts) + db.session.commit() + + +def db_charging_structure_migration(nren_dict): + # Import the data to database + data = excel_parser.fetch_charging_structure_excel_data() + db.session.execute(delete(presentation_models.ChargingStructure).where( + presentation_models.ChargingStructure.year < 2022)) + + inserts = [] + for (abbrev, year, charging_structure) in data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + inserts.append({ + 'nren': nren_dict[abbrev], + 'nren_id': nren_dict[abbrev].id, + 'year': year, + 'fee_type': charging_structure + }) + db.session.bulk_insert_mappings(presentation_models.ChargingStructure, inserts) + db.session.commit() + + +def db_staffing_migration(nren_dict): + db.session.execute(delete(presentation_models.NrenStaff).where( + presentation_models.NrenStaff.year < 2022)) + staff_data = list(excel_parser.fetch_staffing_excel_data()) + + nren_staff_map = {} + inserts = [] + for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping staff data.') + continue + + nren = nren_dict[abbrev] + nren_staff_map[(nren.id, year)] = { + 'nren': nren, + 'nren_id': nren.id, + 'year': year, + 'permanent_fte': permanent_fte, + 'subcontracted_fte': subcontracted_fte, + 'technical_fte': 0, + 'non_technical_fte': 0 + } + + function_data = excel_parser.fetch_staff_function_excel_data() + for (abbrev, year, technical_fte, non_technical_fte) in function_data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping staff function data.') + continue + + nren = nren_dict[abbrev] + if (nren.id, year) in nren_staff_map: + nren_staff_map[(nren.id, year)]['technical_fte'] = technical_fte + nren_staff_map[(nren.id, year)]['non_technical_fte'] = non_technical_fte + else: + nren_staff_map[(nren.id, year)] = { + 'nren': nren, + 'nren_id': nren.id, + 'year': year, + 'permanent_fte': 0, + 'subcontracted_fte': 0, + 'technical_fte': technical_fte, + 'non_technical_fte': non_technical_fte + } + + for nren_staff_model in nren_staff_map.values(): + employed = nren_staff_model['permanent_fte'] + nren_staff_model['subcontracted_fte'] + technical = nren_staff_model['technical_fte'] + nren_staff_model['non_technical_fte'] + if not math.isclose(employed, technical, abs_tol=0.01) and employed != 0 and technical != 0: + logger.warning(f'{nren_staff_model["nren"].name} in {nren_staff_model["year"]}:' + f' FTE do not equal across employed/technical categories ({employed} != {technical})') + del nren_staff_model['nren'] + inserts.append(nren_staff_model) + db.session.bulk_insert_mappings(presentation_models.NrenStaff, inserts) + db.session.commit() + + +def db_ecprojects_migration(nren_dict): + db.session.execute(delete(presentation_models.ECProject).where( + presentation_models.ECProject.year < 2022)) + ecproject_data = excel_parser.fetch_ecproject_excel_data() + inserted = set() + inserts = [] + for (abbrev, year, project) in ecproject_data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + if (nren.id, year, project) in inserted: + logger.warning(f'{nren.name} has duplicate EC project {project} in {year}. Skipping.') + continue + inserted.add((nren.id, year, project)) + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'project': project + }) + db.session.bulk_insert_mappings(presentation_models.ECProject, inserts) + db.session.commit() + + +def db_organizations_migration(nren_dict): + db.session.execute(delete(presentation_models.ParentOrganization).where( + presentation_models.ParentOrganization.year < 2022)) + organization_data = excel_parser.fetch_organization_excel_data() + inserts = [] + for (abbrev, year, org) in organization_data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'organization': org + }) + db.session.bulk_insert_mappings(presentation_models.ParentOrganization, inserts) + db.session.commit() + + +def db_traffic_volume_migration(nren_dict): + db.session.execute(delete(presentation_models.TrafficVolume).where( + presentation_models.TrafficVolume.year < 2023)) + traffic_data = excel_parser.fetch_traffic_excel_data() + inserts = [] + for (abbrev, year, from_external, to_external, from_customers, to_customers) in traffic_data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + if nren.name == 'CESNET': + # COMP-447: correct CESNET traffic data for 2019 + if year == 2019: + to_customers = 222766 + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'from_customers': from_customers, + 'to_customers': to_customers, + 'from_external': from_external, + 'to_external': to_external + }) + + db.session.bulk_insert_mappings(presentation_models.TrafficVolume, inserts) + db.session.commit() + + +def db_nren_services_migration(nren_dict): + services = [s for s in db.session.scalars(select(presentation_models.Service))] + + for service_info in excel_parser.fetch_nren_services_excel_data(): + [service] = [s for s in services if s.name_key == service_info['service_name_key']] + + abbrev = service_info['nren_name'] + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + nren_service = presentation_models.NRENService( + nren=nren, + nren_id=nren.id, + year=service_info['year'], + service=service, + service_key=service.name_key, + product_name=service_info['product_name'], + additional_information=service_info['additional_information'], + official_description=service_info['official_description'] + ) + + db.session.merge(nren_service) + + db.session.commit() + + +def db_connected_proportion_migration(nren_dict): + db.session.execute(delete(presentation_models.ConnectedProportion).where( + presentation_models.ConnectedProportion.year < 2023)) + remit = excel_parser.fetch_remit_excel_data() + nr_connected = excel_parser.fetch_nr_connected_excel_data() + market_share = excel_parser.fetch_market_share_excel_data() + users_served = excel_parser.fetch_users_served_excel_data() + + data_by_nren = defaultdict(dict) + + for key in itertools.chain(remit.keys(), nr_connected.keys(), market_share.keys(), users_served.keys()): + (abbrev, year, user_category) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + to_add = (nren.id, year, user_category) + data_by_nren[to_add].update({ + 'nren_id': nren.id, + 'year': year, + 'user_category': user_category, + 'coverage': remit.get(key), + 'number_connected': nr_connected.get(key, data_by_nren[to_add].get('number_connected')), + 'market_share': market_share.get(key, data_by_nren[to_add].get('market_share')), + 'users_served': users_served.get(key, data_by_nren[to_add].get('users_served')) + }) + + inserts = list(data_by_nren.values()) + + db.session.bulk_insert_mappings(presentation_models.ConnectedProportion, inserts) + db.session.commit() + + +def db_connectivity_level_migration(nren_dict): + db.session.execute(delete(presentation_models.ConnectivityLevel).where( + presentation_models.ConnectivityLevel.year < 2023)) + typical_speed = excel_parser.fetch_typical_speed_excel_data() + highest_speed = excel_parser.fetch_highest_speed_excel_data() + highest_speed_proportion = excel_parser.fetch_highest_speed_proportion_excel_data() + data_by_nren = defaultdict(dict) + + for key in itertools.chain(typical_speed.keys(), highest_speed.keys(), highest_speed_proportion.keys()): + (abbrev, year, user_category) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + to_add = (nren.id, year, user_category) + data_by_nren[to_add].update({ + 'nren_id': nren.id, + 'year': year, + 'user_category': user_category, + 'typical_speed': typical_speed.get(key, data_by_nren[to_add].get('typical_speed')), + 'highest_speed': highest_speed.get(key, data_by_nren[to_add].get('highest_speed')), + 'highest_speed_proportion': highest_speed_proportion.get( + key, data_by_nren[to_add].get('highest_speed_proportion')) + }) + + inserts = list(data_by_nren.values()) + + db.session.bulk_insert_mappings(presentation_models.ConnectivityLevel, inserts) + db.session.commit() + + +def db_connection_carrier_migration(nren_dict): + db.session.execute(delete(presentation_models.ConnectionCarrier).where( + presentation_models.ConnectionCarrier.year < 2023)) + carriers = excel_parser.fetch_carriers_excel_data() + inserts = [] + for key, carry_mechanism in carriers.items(): + (abbrev, year, user_category) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'user_category': user_category, + 'carry_mechanism': carry_mechanism + }) + db.session.bulk_insert_mappings(presentation_models.ConnectionCarrier, inserts) + db.session.commit() + + +def db_connectivity_growth_migration(nren_dict): + db.session.execute(delete(presentation_models.ConnectivityGrowth).where( + presentation_models.ConnectivityGrowth.year < 2023)) + growth = excel_parser.fetch_growth_excel_data() + inserts = [] + for key, growth_percent in growth.items(): + (abbrev, year, user_category) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'user_category': user_category, + 'growth': growth_percent + }) + db.session.bulk_insert_mappings(presentation_models.ConnectivityGrowth, inserts) + db.session.commit() + + +def db_connectivity_load_migration(nren_dict): + db.session.execute(delete(presentation_models.ConnectivityLoad).where( + presentation_models.ConnectivityLoad.year < 2023)) + average = excel_parser.fetch_average_traffic_excel_data() + peak = excel_parser.fetch_peak_traffic_excel_data() + + all_entry_keys = set() + all_entry_keys.update(average.keys()) + all_entry_keys.update(peak.keys()) + inserts = [] + for key in all_entry_keys: + (abbrev, year, user_category) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'user_category': user_category, + 'average_load_from_institutions': average.get(key, (None, None))[0], + 'average_load_to_institutions': average.get(key, (None, None))[1], + 'peak_load_from_institutions': peak.get(key, (None, None))[0], + 'peak_load_to_institutions': peak.get(key, (None, None))[1] + }) + db.session.bulk_insert_mappings(presentation_models.ConnectivityLoad, inserts) + db.session.commit() + + +def db_remote_campuses_migration(nren_dict): + db.session.execute(delete(presentation_models.RemoteCampuses).where( + presentation_models.RemoteCampuses.year < 2023)) + campuses = excel_parser.fetch_remote_campuses_excel_data() + inserts = [] + for (abbrev, year, connectivity, country, connected_to_r_e) in campuses: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + connections = [] + if country: + connections.append({'country': country, 'local_r_and_e_connection': connected_to_r_e}) + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'remote_campus_connectivity': connectivity, + 'connections': connections + }) + db.session.bulk_insert_mappings(presentation_models.RemoteCampuses, inserts) + db.session.commit() + + +def db_dark_fibre_lease_migration(nren_dict): + db.session.execute(delete(presentation_models.DarkFibreLease).where( + presentation_models.DarkFibreLease.year < 2023)) + data_rows = excel_parser.fetch_dark_fibre_iru_excel_data() + iru_duration = excel_parser.fetch_iru_duration_excel_data() + inserts = [] + for (abbrev, year, iru, length_in_country, length_out_country) in data_rows: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'iru_or_lease': iru, + 'fibre_length_in_country': length_in_country, + 'fibre_length_outside_country': length_out_country, + 'iru_duration': iru_duration.get((abbrev, year)) + }) + db.session.bulk_insert_mappings(presentation_models.DarkFibreLease, inserts) + db.session.commit() + + +def db_dark_fibre_installed_migration(nren_dict): + db.session.execute(delete(presentation_models.DarkFibreInstalled).where( + presentation_models.DarkFibreInstalled.year < 2023)) + data_rows = excel_parser.fetch_dark_fibre_installed_excel_data() + inserts = [] + for (abbrev, year, installed, length) in data_rows: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'installed': installed, + 'fibre_length_in_country': length + }) + db.session.bulk_insert_mappings(presentation_models.DarkFibreInstalled, inserts) + db.session.commit() + + +def db_passive_monitoring_migration(nren_dict): + db.session.execute(delete(presentation_models.PassiveMonitoring).where( + presentation_models.PassiveMonitoring.year < 2023)) + data_rows = excel_parser.fetch_passive_monitoring_excel_data() + inserts = [] + for (abbrev, year, monitoring, method) in data_rows: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'monitoring': monitoring, + 'method': method + }) + db.session.bulk_insert_mappings(presentation_models.PassiveMonitoring, inserts) + db.session.commit() + + +def db_capacity_migration(nren_dict): + db.session.execute(delete(presentation_models.Capacity).where( + presentation_models.Capacity.year < 2023)) + largest_data_rows = excel_parser.fetch_largest_link_capacity_excel_data() + typical_data_rows = excel_parser.fetch_typical_backbone_capacity_excel_data() + + by_nren = defaultdict(dict) + + for key in itertools.chain(largest_data_rows.keys(), typical_data_rows.keys()): + (abbrev, year) = key + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + to_add = (nren.id, year) + by_nren[to_add].update({ + 'nren_id': nren.id, + 'year': year, + 'largest_link_capacity': largest_data_rows.get(key, by_nren[to_add].get('largest_link_capacity')), + 'typical_backbone_capacity': typical_data_rows.get(key, by_nren[to_add].get('typical_backbone_capacity')) + }) + inserts = list(by_nren.values()) + db.session.bulk_insert_mappings(presentation_models.Capacity, inserts) + db.session.commit() + + +def db_non_r_e_peers_migration(nren_dict): + db.session.execute(delete(presentation_models.NonREPeers).where( + presentation_models.NonREPeers.year < 2023)) + data_rows = excel_parser.fetch_non_r_e_peers_excel_data() + inserts = [] + for (abbrev, year, nr_of_non_r_and_e_peers) in data_rows: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'nr_of_non_r_and_e_peers': nr_of_non_r_and_e_peers + }) + db.session.bulk_insert_mappings(presentation_models.NonREPeers, inserts) + db.session.commit() + + +def db_ops_automation_migration(nren_dict): + db.session.execute(delete(presentation_models.OpsAutomation).where( + presentation_models.OpsAutomation.year < 2023)) + data_rows = excel_parser.fetch_ops_automation_excel_data() + inserts = [] + for (abbrev, year, automation, specifics) in data_rows: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + + nren = nren_dict[abbrev] + inserts.append({ + 'nren_id': nren.id, + 'year': year, + 'ops_automation': automation, + 'ops_automation_specifics': specifics + }) + db.session.bulk_insert_mappings(presentation_models.OpsAutomation, inserts) + db.session.commit() + + +def _cli(app): + with app.app_context(): + nren_dict = helpers.get_uppercase_nren_dict() + db_budget_migration(nren_dict) + db_funding_migration(nren_dict) + db_charging_structure_migration(nren_dict) + db_staffing_migration(nren_dict) + db_ecprojects_migration(nren_dict) + db_organizations_migration(nren_dict) + db_traffic_volume_migration(nren_dict) + db_nren_services_migration(nren_dict) + + db_connected_proportion_migration(nren_dict) + db_connectivity_level_migration(nren_dict) + db_connection_carrier_migration(nren_dict) + db_connectivity_growth_migration(nren_dict) + db_connectivity_load_migration(nren_dict) + db_remote_campuses_migration(nren_dict) + + db_dark_fibre_lease_migration(nren_dict) + db_dark_fibre_installed_migration(nren_dict) + db_passive_monitoring_migration(nren_dict) + db_capacity_migration(nren_dict) + db_non_r_e_peers_migration(nren_dict) + db_ops_automation_migration(nren_dict) + + +@click.command() +@click.option('--config', type=click.STRING, default='config.json') +def cli(config): + app_config = load(open(config, 'r')) + + app_config['SQLALCHEMY_BINDS'] = {survey_model.SURVEY_DB_BIND: app_config['SURVEY_DATABASE_URI']} + + app = compendium_v2._create_app_with_db(app_config) + print("survey-publisher-v1 starting") + _cli(app) + + +if __name__ == "__main__": + cli() diff --git a/compendium_v2/publishers/survey_publisher_old_db_2022.py b/compendium_v2/publishers/survey_publisher_old_db_2022.py new file mode 100644 index 0000000000000000000000000000000000000000..f14cb4672146d22570efd34539e7b4cdddbbf37a --- /dev/null +++ b/compendium_v2/publishers/survey_publisher_old_db_2022.py @@ -0,0 +1,1568 @@ +""" +survey_publisher_old_db_2022 +============================ + +This module loads the survey data from 2022 from the old survey database into presentation_models. +Registered as click cli command when installing compendium-v2. + +""" +from decimal import Decimal +import logging +import click +import enum +import math +import json +import html +import itertools + +from sqlalchemy import delete, text +from collections import defaultdict + +import compendium_v2 +from compendium_v2.conversion.mapping import CHARGING_LEVELS, CONNECTION, INTERCONNECTION, SERVICE_USER_TYPE_TO_CODE +from compendium_v2.db.presentation_model_enums import CommercialCharges, CommercialConnectivityCoverage, \ + ConnectionMethod, FeeType, ServiceCategory, UserCategory, YesNoPlanned +from compendium_v2.environment import setup_logging +from compendium_v2.config import load +from compendium_v2.publishers.helpers import extract_urls, valid_url +from compendium_v2.survey_db import model as survey_model +from compendium_v2.db import db, presentation_models +from compendium_v2.publishers import helpers +from compendium_v2.conversion import mapping + +setup_logging() + +logger = logging.getLogger('survey-publisher-old-db-2022') + +BUDGET_QUERY = """ +SELECT DISTINCT ON (n.id, a.question_id) + n.abbreviation AS nren, + a.value AS budget +FROM answers a +JOIN nrens n ON a.nren_id = n.id +JOIN questions q ON a.question_id = q.id +JOIN sections s ON q.section_id = s.id +JOIN compendia c ON s.compendium_id = c.id +WHERE + a.question_id = 16402 +AND c.year = 2022 +ORDER BY n.id, a.question_id, a.updated_at DESC +""" + +QUESTION_TEMPLATE_QUERY = """ +SELECT DISTINCT ON (n.id, a.question_id) + n.abbreviation AS nren, + a.value AS value +FROM answers a +JOIN nrens n ON a.nren_id = n.id +JOIN questions q ON a.question_id = q.id +JOIN sections s ON q.section_id = s.id +JOIN compendia c ON s.compendium_id = c.id +WHERE + a.question_id = {} + AND c.year = {} + AND a.value NOT IN ('"NA"', '"N/A"', '[""]', '["-"]', '["/"]') +ORDER BY n.id, a.question_id, a.updated_at DESC +""" + +RECURSIVE_QUERY = """ + WITH RECURSIVE parent_questions AS ( + -- Base case + SELECT q.id, q.equivalent_question_id, c.year, q.title + FROM questions q + JOIN sections s ON q.section_id = s.id + JOIN compendia c ON s.compendium_id = c.id + WHERE q.id = {} + UNION ALL + -- Recursive case + SELECT q.id, q.equivalent_question_id, c.year, q.title + FROM questions q + INNER JOIN parent_questions pq ON q.id = pq.equivalent_question_id + JOIN sections s ON q.section_id = s.id + JOIN compendia c ON s.compendium_id = c.id) + SELECT DISTINCT ON (n.id, answers.question_id) answers.id, + UPPER(n.abbreviation) AS nren, + parent_questions.year, + answers.value as answer + FROM answers + JOIN parent_questions ON answers.question_id = parent_questions.id + JOIN nrens n on answers.nren_id = n.id + WHERE UPPER(answers.value) NOT IN ('"NA"', '"N/A"', '[""]', '["-"]', '["/"]', '/', '["NA"]', '""', '[]', '[n/a]') + ORDER BY n.id, answers.question_id, answers.updated_at DESC; +""" + + +class FundingSource(enum.Enum): + CLIENT_INSTITUTIONS = 16405 + EUROPEAN_FUNDING = 16406 + COMMERCIAL = 16407 + OTHER = 16408 + GOV_PUBLIC_BODIES = 16409 + + +class StaffQuestion(enum.Enum): + """ + Answers are numbers expressed in FTEs (full time equivalents) + """ + PERMANENT_FTE = 16414 + SUBCONTRACTED_FTE = 16413 + TECHNICAL_FTE = 16416 + NON_TECHNICAL_FTE = 16417 + + +class OrgQuestion(enum.Enum): + """ + Answers are strings + """ + PARENT_ORG_NAME = 16419 + + SUB_ORGS_1_NAME = 16422 + SUB_ORGS_1_CHOICE = 16449 + SUB_ORGS_1_ROLE = 16426 + + SUB_ORGS_2_NAME = 16429 + SUB_ORGS_2_CHOICE = 16448 + SUB_ORGS_2_ROLE = 16434 + + SUB_ORGS_3_NAME = 16430 + SUB_ORGS_3_CHOICE = 16446 + SUB_ORGS_3_ROLE = 16435 + + SUB_ORGS_4_NAME = 16432 + SUB_ORGS_4_CHOICE = 16451 + SUB_ORGS_4_ROLE = 16438 + + SUB_ORGS_5_NAME = 16433 + SUB_ORGS_5_CHOICE = 16450 + SUB_ORGS_5_ROLE = 16439 + + +class ECQuestion(enum.Enum): + EC_PROJECT = 16453 + + +class ChargingStructure(enum.Enum): + """ + Answers are strings + """ + charging_structure = 16410 + + +def query_budget(): + return db.session.execute(text(BUDGET_QUERY), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}) + + +def recursive_query(question_id_2022): + assert question_id_2022 + query = RECURSIVE_QUERY.format(question_id_2022) + return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}) + + +def query_funding_sources(): + for source in FundingSource: + query = QUESTION_TEMPLATE_QUERY.format(source.value, 2022) + yield source, db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}) + + +def query_question(question: enum.Enum): + return query_question_id(question.value) + + +def query_question_id(question_id: int, year: int = 2022): + query = QUESTION_TEMPLATE_QUERY.format(question_id, year) + return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}) + + +def _parse_json_urls(value, nren_name): + if value and not value.startswith('['): + value = f'[{value}]' + + try: + return [url.strip().strip('/') for url in json.loads(value) if url.strip()] + except json.decoder.JSONDecodeError: + logger.info(f'JSON decode error for urls for {nren_name}.') + return [] + + +def transfer_budget(nren_dict): + rows = query_budget() + db.session.execute(delete(presentation_models.BudgetEntry).where( + presentation_models.BudgetEntry.year == 2022) + ) + for row in rows: + nren_name = row[0].upper() + _budget = row[1] + try: + budget = float(_budget.replace('"', '').replace(',', '')) + except ValueError: + logger.info(f'{nren_name} has no budget for 2022. Skipping. ({_budget}))') + continue + + if budget > 400: + logger.info(f'{nren_name} has budget set to >400M EUR for 2022. ({budget})') + continue + + if budget == 0: + logger.info(f'{nren_name} has budget set to 0 for 2022. Skipping.') + continue + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + budget_entry = presentation_models.BudgetEntry( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + budget=budget, + year=2022, + ) + db.session.merge(budget_entry) + db.session.commit() + + +def transfer_institutions_urls(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.InstitutionURLs).where( + presentation_models.InstitutionURLs.year <= 2022) + ) + + rows = recursive_query(16507) + + for row in rows: + answer_id, nren_name, year, answer = row + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + urls = extract_urls(text=answer) + urls_json = _parse_json_urls(answer, nren_name) + if urls != urls_json: + logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}') + + if not urls: + logger.info(f'{nren_name} has no urls for {year}. Skipping.') + continue + + valid_urls = [] + + for url in urls: + if not valid_url(url): + logger.warning(f'Invalid institution URL for {nren_name} in {year}: {url}. Skipping.') + continue + valid_urls.append(url) + + if not valid_urls: + logger.info(f'{nren_name} has no valid urls for {year}. Skipping.') + continue + + institution_urls = presentation_models.InstitutionURLs( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + urls=valid_urls, + year=year, + ) + db.session.merge(institution_urls) + + db.session.commit() + + +def transfer_funding_sources(nren_dict): + sourcedata = {} + db.session.execute(delete(presentation_models.FundingSource).where( + presentation_models.FundingSource.year == 2022) + ) + for source, data in query_funding_sources(): + for row in data: + nren_name = row[0].upper() + _value = row[1] + try: + value = float(_value.replace('"', '').replace(',', '')) + except ValueError: + name = source.name + logger.info(f'{nren_name} has invalid value for {name}. ({_value}))') + value = 0 + + nren_info = sourcedata.setdefault( + nren_name, + {source_type: 0 for source_type in FundingSource} + ) + nren_info[source] = value + + for nren_name, nren_info in sourcedata.items(): + total = sum(nren_info.values()) + + if not math.isclose(total, 100, abs_tol=0.01): + logger.info(f'{nren_name} funding sources do not sum to 100%. ({total})') + continue + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + if nren_name == 'HEANET': + nren_info[FundingSource.OTHER] = nren_info[FundingSource.OTHER] + nren_info[FundingSource.COMMERCIAL] + nren_info[FundingSource.COMMERCIAL] = 0 + + funding_source = presentation_models.FundingSource( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + client_institutions=nren_info[FundingSource.CLIENT_INSTITUTIONS], + european_funding=nren_info[FundingSource.EUROPEAN_FUNDING], + gov_public_bodies=nren_info[FundingSource.GOV_PUBLIC_BODIES], + commercial=nren_info[FundingSource.COMMERCIAL], + other=nren_info[FundingSource.OTHER], + ) + db.session.merge(funding_source) + db.session.commit() + + +def transfer_staff_data(nren_dict): + data = {} + db.session.execute(delete(presentation_models.NrenStaff).where( + presentation_models.NrenStaff.year == 2022) + ) + for question in StaffQuestion: + rows = query_question(question) + for row in rows: + nren_name = row[0].upper() + _value = row[1] + try: + value = float(_value.replace('"', '').replace(',', '')) + except ValueError: + value = 0 + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + # initialize on first use, so we don't add data for nrens with no answers + data.setdefault(nren_name, {question: 0 for question in StaffQuestion})[question] = value + + for nren_name, nren_info in data.items(): + if sum([nren_info[question] for question in StaffQuestion]) == 0: + logger.info(f'{nren_name} has no staff data. Deleting if exists.') + db.session.execute(delete(presentation_models.NrenStaff).where( + presentation_models.NrenStaff.nren_id == nren_dict[nren_name].id, + presentation_models.NrenStaff.year == 2022 + )) + continue + + employed = nren_info[StaffQuestion.PERMANENT_FTE] + nren_info[StaffQuestion.SUBCONTRACTED_FTE] + technical = nren_info[StaffQuestion.TECHNICAL_FTE] + nren_info[StaffQuestion.NON_TECHNICAL_FTE] + + if not math.isclose(employed, technical, abs_tol=0.01): + logger.info(f'{nren_name} FTE do not equal across employed/technical categories.' + f' ({employed} != {technical})') + continue + + staff_data = presentation_models.NrenStaff( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + permanent_fte=nren_info[StaffQuestion.PERMANENT_FTE], + subcontracted_fte=nren_info[StaffQuestion.SUBCONTRACTED_FTE], + technical_fte=nren_info[StaffQuestion.TECHNICAL_FTE], + non_technical_fte=nren_info[StaffQuestion.NON_TECHNICAL_FTE], + ) + db.session.merge(staff_data) + db.session.commit() + + +def transfer_nren_parent_org(nren_dict): + # clean up the data a bit by removing some strings + strings_to_replace = [ + 'We are affiliated to ' + ] + + db.session.execute(delete(presentation_models.ParentOrganization).where( + presentation_models.ParentOrganization.year == 2022) + ) + + rows = query_question(OrgQuestion.PARENT_ORG_NAME) + for row in rows: + nren_name = row[0].upper() + value = str(row[1]).replace('"', '') + + if not value: + continue + + for string in strings_to_replace: + value = value.replace(string, '') + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + parent_org = presentation_models.ParentOrganization( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + organization=value, + ) + db.session.merge(parent_org) + db.session.commit() + + +def transfer_nren_sub_org(nren_dict): + suborg_questions = [ + (OrgQuestion.SUB_ORGS_1_NAME, OrgQuestion.SUB_ORGS_1_CHOICE, OrgQuestion.SUB_ORGS_1_ROLE), + (OrgQuestion.SUB_ORGS_2_NAME, OrgQuestion.SUB_ORGS_2_CHOICE, OrgQuestion.SUB_ORGS_2_ROLE), + (OrgQuestion.SUB_ORGS_3_NAME, OrgQuestion.SUB_ORGS_3_CHOICE, OrgQuestion.SUB_ORGS_3_ROLE), + (OrgQuestion.SUB_ORGS_4_NAME, OrgQuestion.SUB_ORGS_4_CHOICE, OrgQuestion.SUB_ORGS_4_ROLE), + (OrgQuestion.SUB_ORGS_5_NAME, OrgQuestion.SUB_ORGS_5_CHOICE, OrgQuestion.SUB_ORGS_5_ROLE) + ] + lookup = defaultdict(list) + + db.session.execute(delete(presentation_models.SubOrganization).where( + presentation_models.SubOrganization.year == 2022) + ) + + for name, choice, role in suborg_questions: + _name_rows = query_question(name) + _choice_rows = query_question(choice) + _role_rows = list(query_question(role)) + for _name, _choice in zip(_name_rows, _choice_rows): + nren_name = _name[0].upper() + suborg_name = _name[1].replace('"', '').strip() + role_choice = _choice[1].replace('"', '').strip() + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping.') + continue + + if role_choice.lower() == 'other': + for _role in _role_rows: + if _role[0] == _name[0]: + role = _role[1].replace('"', '').strip() + break + else: + role = role_choice + + if not role: + continue + + lookup[nren_name].append((suborg_name, role)) + + for nren_name, suborgs in lookup.items(): + for suborg_name, role in suborgs: + suborg = presentation_models.SubOrganization( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + organization=suborg_name, + role=role, + ) + db.session.merge(suborg) + db.session.commit() + + +def transfer_charging_structure(nren_dict): + db.session.execute(delete(presentation_models.ChargingStructure).where( + presentation_models.ChargingStructure.year == 2022) + ) + rows = query_question(ChargingStructure.charging_structure) + for row in rows: + nren_name = row[0].upper() + value = row[1].replace('"', '').strip() + + if nren_name not in nren_dict: + logger.info(f'{nren_name} unknown. Skipping from charging structure.') + continue + + if "do not charge" in value: + charging_structure = FeeType.no_charge + elif "combination" in value: + charging_structure = FeeType.combination + elif "flat" in value: + charging_structure = FeeType.flat_fee + elif "usage-based" in value: + charging_structure = FeeType.usage_based_fee + elif "Other" in value: + charging_structure = FeeType.other + else: + charging_structure = None + + charging_structure = presentation_models.ChargingStructure( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + fee_type=charging_structure, + ) + db.session.merge(charging_structure) + db.session.commit() + + +def transfer_ec_projects(nren_dict): + # delete all existing EC projects, in case something changed + db.session.execute( + delete(presentation_models.ECProject).where(presentation_models.ECProject.year == 2022) + ) + + rows = query_question(ECQuestion.EC_PROJECT) + for row in rows: + nren_name = row[0].upper() + + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + try: + value = json.loads(row[1]) + except json.decoder.JSONDecodeError: + logger.info(f'JSON decode error for EC project data for {nren_name}. Skipping.') + continue + + for val in value: + if not val: + logger.info(f'Empty EC project value for {nren_name}.') + continue + + # strip html entities/NBSP from val + val = html.unescape(val).replace('\xa0', ' ') + + # some answers include contract numbers, which we don't want here + val = val.split('(contract n')[0] + + ec_project = presentation_models.ECProject( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=2022, + project=str(val).strip() + ) + db.session.add(ec_project) + db.session.commit() + + +def transfer_policies(nren_dict): + """ + Answers are strings that should be urls, but sometimes there's other stuff + like email addresses or random text + """ + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.Policy).where( + presentation_models.Policy.year <= 2022) + ) + + policy_questions = { + 'strategy': {2022: 16469, 2021: 16064, 2020: 15720, 2019: 15305, 2018: 14910}, + 'environment': {2022: 16471, 2021: 16066, 2020: 15722, 2019: 15307, 2018: 14912}, + 'equality': {2022: 16473, 2021: 16378}, + 'connectivity': {2022: 16475, 2021: 16068, 2020: 15724, 2019: 15309, 2018: 14914}, + 'acceptable_use': {2022: 16477, 2021: 16070, 2020: 15726, 2019: 15311, 2018: 14916}, + 'privacy': {2022: 16479, 2021: 16072, 2020: 15728, 2019: 15575}, + 'data_protection': {2022: 16481, 2021: 16074, 2020: 15730, 2019: 15577}, + 'gender': {2022: 16761} + } + + data = {} + for year in [2018, 2019, 2020, 2021, 2022]: + policy_questions_year = {key: years[year] for key, years in policy_questions.items() if year in years} + for question_key, question_id in policy_questions_year.items(): + rows = query_question_id(question_id, year) + for row in rows: + nren_name = row[0].upper() + _value = row[1] + + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + value = _value.split()[0].strip('"') + + if not value: + # don't warn on empty answers, just skip + continue + + if value.upper() == 'N.A.' or ('.' not in value and '@' not in value): + # this test is a bit silly but does seem to filter out all the nonsense responses + logger.warning(f'"{value}" does not look like an email address or link. Skipping.') + continue + + if _value not in [f'"{value}"', value]: + logger.info(f'Cleaned policy answer: "{_value}" became "{value}"') + + # initialize on first use, so we don't add data for nrens with no answers + data.setdefault((nren_name, year), {q: '' for q in policy_questions.keys()}) + data[(nren_name, year)][question_key] = value + + for (nren_name, year), nren_info in data.items(): + + strategy = nren_info['strategy'] + if strategy and not valid_url(strategy): + strategy = '' + environment = nren_info['environment'] + if environment and not valid_url(environment): + environment = '' + equality = nren_info['equality'] + if equality and not valid_url(equality): + equality = '' + connectivity = nren_info['connectivity'] + if connectivity and not valid_url(connectivity): + connectivity = '' + acceptable_use = nren_info['acceptable_use'] + if acceptable_use and not valid_url(acceptable_use): + acceptable_use = '' + privacy = nren_info['privacy'] + if privacy and not valid_url(privacy): + privacy = '' + data_protection = nren_info['data_protection'] + if data_protection and not valid_url(data_protection): + data_protection = '' + gender_equality = nren_info['gender'] + if gender_equality and not valid_url(gender_equality): + gender_equality = '' + + all_policies = [strategy, environment, equality, connectivity, + acceptable_use, privacy, data_protection, gender_equality] + if not any(all_policies): + logger.info(f'{nren_name} has no policy data. Skipping.') + continue + + policy_data = presentation_models.Policy( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + strategic_plan=strategy, + environmental=environment, + equal_opportunity=equality, + connectivity=connectivity, + acceptable_use=acceptable_use, + privacy_notice=privacy, + data_protection=data_protection, + gender_equality=gender_equality + ) + db.session.merge(policy_data) + db.session.commit() + + +def transfer_central_procurement(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.CentralProcurement).where( + presentation_models.CentralProcurement.year <= 2022) + ) + + rows = recursive_query(16482) + amounts = recursive_query(16483) + amounts = {(nren_name, year): Decimal(answer.strip('"')) for answer_id, nren_name, year, answer in amounts} + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.CentralProcurement( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + central_procurement=answer == '"Yes"', + amount=amounts.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_service_management(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.ServiceManagement).where( + presentation_models.ServiceManagement.year <= 2022) + ) + + framework = recursive_query(16484) + framework = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in framework} + targets = recursive_query(16485) + targets = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in targets} + + for nren_name, year in framework.keys() | targets.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.ServiceManagement( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + service_management_framework=framework.get((nren_name, year)), + service_level_targets=targets.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_service_user_types(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.ServiceUserTypes).where( + presentation_models.ServiceUserTypes.year <= 2022) + ) + + categories = [ + (ServiceCategory.identity, 16488), + (ServiceCategory.network_services, 16489), + (ServiceCategory.collaboration, 16490), + (ServiceCategory.security, 16491), + (ServiceCategory.isp_support, 16492), + (ServiceCategory.storage_and_hosting, 16493), + (ServiceCategory.multimedia, 16494), + (ServiceCategory.professional_services, 16495) + ] + for service_category, question_id in categories: + rows = recursive_query(question_id) + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + for user_cat_db in json.loads(answer): + user_cat = UserCategory[SERVICE_USER_TYPE_TO_CODE[user_cat_db]] + new_entry = presentation_models.ServiceUserTypes( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + user_category=user_cat, + service_category=service_category + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_standards(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.Standards).where( + presentation_models.Standards.year <= 2022) + ) + + audits = recursive_query(16499) + audits = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in audits} + audit_specifics = recursive_query(16500) + audit_specifics = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in audit_specifics} + bcp = recursive_query(16501) + bcp = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in bcp} + bcp_specifics = recursive_query(16502) + bcp_specifics = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in bcp_specifics} + cmp = recursive_query(16762) + cmp = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in cmp} + + for nren_name, year in audits.keys() | audit_specifics.keys() | bcp.keys() | bcp_specifics.keys() | cmp.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.Standards( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + audits=audits.get((nren_name, year)), + audit_specifics=audit_specifics.get((nren_name, year), ""), + business_continuity_plans=bcp.get((nren_name, year)), + business_continuity_plans_specifics=bcp_specifics.get((nren_name, year), ""), + crisis_management_procedure=cmp.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_crisis_exercises(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.CrisisExercises).where( + presentation_models.CrisisExercises.year <= 2022) + ) + + rows = recursive_query(16763) + + crisis_exercises_map = { + "geant_workshops": "We participate in GEANT Crisis workshops such as CLAW", + "national_excercises": "We participated in National crisis exercises ", + "tabletop_exercises": "We run our own tabletop exercises", + "simulation_excercises": "We run our own simulation exercises", + "other_excercises": "We have done/participated in other exercises or trainings", + "real_crisis": "We had a real crisis", + "internal_security_programme": "We run an internal security awareness programme", + "none": "No, we have not done any crisis exercises or trainings", + } + _reversed_map = {v: k for k, v in crisis_exercises_map.items()} + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + descriptions = json.loads(answer) + + new_entry = presentation_models.CrisisExercises( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + exercise_descriptions=[_reversed_map.get(desc, desc) for desc in descriptions] + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_security_controls(nren_dict): + + controls_map = { + "anti_virus": "Anti Virus", + "anti_spam": "Anti-Spam", + "firewall": "Firewall", + "ddos_mitigation": "DDoS mitigation", + "monitoring": "Network monitoring", + "ips_ids": "IPS/IDS", + "acl": "ACL", + "segmentation": "Network segmentation", + "integrity_checking": "Integrity checking" + } + + reversed_map = {v: k for k, v in controls_map.items()} + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.SecurityControls).where( + presentation_models.SecurityControls.year <= 2022) + ) + + sc = recursive_query(16503) + sc = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in sc} + sc_other = recursive_query(16504) + sc_other = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in sc_other} + for key, value in sc_other.items(): + if not isinstance(value, list): + sc_other[key] = [value] + + for nren_name, year in sc.keys() | sc_other.keys(): + if year < 2021: # prior to 2022, the mapping is different, use a different data source + continue + + # TODO: import the pre-2022 data from a handmade CSV. + + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + full_list = sc.get((nren_name, year), []) + other_entries = [e.strip() for e in sc_other.get((nren_name, year), []) + if e.strip() and e.lower() not in ["n/a", "-"]] + other_entry = ", ".join(other_entries) + if other_entry: + full_list.append(other_entry) + if "Other" in full_list: + full_list.remove("Other") + + full_list = [reversed_map.get(control, control) for control in full_list] + + if full_list: + new_entry = presentation_models.SecurityControls( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + security_control_descriptions=full_list + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_eosc_listings(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.EOSCListings).where( + presentation_models.EOSCListings.year <= 2022) + ) + + rows = recursive_query(16497) + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.EOSCListings( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + service_names=[x for x in json.loads(answer) if x] + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_commercial_connectivity(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.CommercialConnectivity).where( + presentation_models.CommercialConnectivity.year <= 2022) + ) + + simple_connection = { + key.replace(" ", "").replace("-", "").replace("/", "").lower(): value for key, value in CONNECTION.items() + } + + def get_coverage(db_string): + cleaned_str = db_string.strip('"').replace(" ", "").replace("-", "").replace("/", "").lower() + key = simple_connection[cleaned_str] + return CommercialConnectivityCoverage[key] + + sp = recursive_query(16646) + sp = {(nren_name, year): get_coverage(answer) for answer_id, nren_name, year, answer in sp} + collab = recursive_query(16647) + collab = {(nren_name, year): get_coverage(answer) for answer_id, nren_name, year, answer in collab} + r_e = recursive_query(16648) + r_e = {(nren_name, year): get_coverage(answer) for answer_id, nren_name, year, answer in r_e} + general = recursive_query(16649) + general = {(nren_name, year): get_coverage(answer) for answer_id, nren_name, year, answer in general} + spin_off = recursive_query(16650) + spin_off = {(nren_name, year): get_coverage(answer) for answer_id, nren_name, year, answer in spin_off} + + for nren_name, year in sp.keys() | collab.keys() | r_e.keys() | general.keys() | spin_off.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.CommercialConnectivity( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + commercial_r_and_e=r_e.get((nren_name, year)), + commercial_general=general.get((nren_name, year)), + commercial_collaboration=collab.get((nren_name, year)), + commercial_service_provider=sp.get((nren_name, year)), + university_spin_off=spin_off.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_commercial_charging_level(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.CommercialChargingLevel).where( + presentation_models.CommercialChargingLevel.year <= 2022) + ) + + simple_charging = { + key.replace(" ", "").replace("-", "").replace("/", "").lower(): value for key, value in CHARGING_LEVELS.items() + } + simple_charging["nochargesapplied"] = "no_charges_if_r_e_requested" + simple_charging['nochargesappliedifrequestedbyr+eusers\\"needed?'] = "no_charges_if_r_e_requested" + + def get_charging(db_string): + if db_string[0] == '[': + db_string = json.loads(db_string)[0] + cleaned_str = db_string.strip('"').replace(" ", "").replace("-", "").replace("/", "").lower() + key = simple_charging[cleaned_str] + return CommercialCharges[key] + + collab = recursive_query(16652) + collab = {(nren_name, year): get_charging(answer) for answer_id, nren_name, year, answer in collab} + services = recursive_query(16653) + services = {(nren_name, year): get_charging(answer) for answer_id, nren_name, year, answer in services} + peering = recursive_query(16654) + peering = {(nren_name, year): get_charging(answer) for answer_id, nren_name, year, answer in peering} + + for nren_name, year in collab.keys() | services.keys() | peering.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.CommercialChargingLevel( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + collaboration=collab.get((nren_name, year)), + service_supplier=services.get((nren_name, year)), + direct_peering=peering.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_fibre_light(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.FibreLight).where( + presentation_models.FibreLight.year <= 2022) + ) + + comment_map = mapping.VALUE_TO_CODE_MAPPING.get(16668) + + fibre = recursive_query(16668) + fibre = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre} + fibre_comment = recursive_query(16669) + fibre_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre_comment} + + for nren_name, year in fibre.keys() | fibre_comment.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + description = fibre.get((nren_name, year)) + comment = fibre_comment.get((nren_name, year)) + if description and description[0:5] != "Other": + if comment and comment.replace("-", "") != "": + logger.warning( + f'fibre light comment while description is not "Other": {description} {comment} {nren_name}.' + ) + else: + description = comment + + if description: + description = comment_map.get(description, description).replace("\\", "") + new_entry = presentation_models.FibreLight( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + light_description=description + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_network_map_urls(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.NetworkMapUrls).where( + presentation_models.NetworkMapUrls.year <= 2022) + ) + + rows = recursive_query(16670) + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + urls = extract_urls(text=answer) + urls_json = _parse_json_urls(answer, nren_name) + if urls != urls_json: + logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}') + + if not urls: + logger.info(f'{nren_name} has no urls for {year}. Skipping.') + continue + + new_entry = presentation_models.NetworkMapUrls( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + urls=urls + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_traffic_statistics(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.TrafficStatistics).where( + presentation_models.TrafficStatistics.year <= 2022) + ) + + stats = recursive_query(16677) + stat_urls = recursive_query(16678) + stat_urls = {(nren_name, year): answer for answer_id, nren_name, year, answer in stat_urls} + + for answer_id, nren_name, year, answer in stats: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + db_urls = stat_urls.get((nren_name, year)) + if db_urls: + urls = extract_urls(text=db_urls) + urls_json = _parse_json_urls(db_urls, nren_name) + if urls != urls_json: + logger.info( + f'Traffic stat URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}' + ) + db_urls = urls + else: + db_urls = [] + + valid_urls = [] + for url in db_urls: + if valid_url(url): + valid_urls.append(url) + else: + logger.warning(f'Invalid Traffic Statistics URL for {nren_name} in {year}: {url}. Skipping.') + + if not valid_urls: + logger.info(f'{nren_name} has no valid urls for {year}. Skipping.') + continue + + new_entry = presentation_models.TrafficStatistics( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + traffic_statistics=answer == '"Yes"', + urls=db_urls + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_siem_vendors(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.SiemVendors).where( + presentation_models.SiemVendors.year <= 2022) + ) + + vendors = recursive_query(16679) + vendors = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in vendors} + vendor_comment = recursive_query(16680) + vendor_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in vendor_comment} + + for nren_name, year in vendors.keys() | vendor_comment.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + vendor_names = vendors.get((nren_name, year), []) + comment = vendor_comment.get((nren_name, year)) + if comment: + vendor_names.append(comment) + vendor_names.remove("Other") + + new_entry = presentation_models.SiemVendors( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + vendor_names=vendor_names + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_certificate_providers(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.CertificateProviders).where( + presentation_models.CertificateProviders.year <= 2022) + ) + + provider_map = mapping.VALUE_TO_CODE_MAPPING.get(16681) + + providers = recursive_query(16681) + providers = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in providers} + prov_comment = recursive_query(16682) + prov_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in prov_comment} + + for nren_name, year in providers.keys() | prov_comment.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + provider_names = providers.get((nren_name, year), []) + comment = prov_comment.get((nren_name, year)) + if comment: + provider_names.append(comment) + if "Other" in provider_names: + provider_names.remove("Other") + + def _replace_provider(provider): + if 'let' in provider.lower() and 'encrypt' in provider.lower(): + return "Let's Encrypt" + return provider_map.get(provider, provider) + + provider_names = [_replace_provider(p) for p in provider_names] + + new_entry = presentation_models.CertificateProviders( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + provider_names=provider_names + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_weather_map(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.WeatherMap).where( + presentation_models.WeatherMap.year <= 2022) + ) + + weather = recursive_query(16683) + urls = recursive_query(16684) + urls = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in urls} + + for answer_id, nren_name, year, answer in weather: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + url = urls.get((nren_name, year), "") + if url: + found_urls = extract_urls(text=url) + if found_urls: + url = found_urls[0] + else: + url = "" + + orig_url = urls.get((nren_name, year), "").strip("/") + if url != orig_url: + logger.info(f'Weather URL for {nren_name} do not match between json and regex. {url} != {orig_url}') + + valid = valid_url(url) + + if not valid: + logger.warning(f'Invalid WeatherMap URL for {nren_name} in {year}: {url}. Skipping.') + continue + + new_entry = presentation_models.WeatherMap( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + weather_map=answer == '"Yes"', + url=url + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_pert_team(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.PertTeam).where( + presentation_models.PertTeam.year <= 2022) + ) + + rows = recursive_query(16685) + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + if answer == "null": + continue + pert = YesNoPlanned[answer.strip('"').lower()] + new_entry = presentation_models.PertTeam( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + pert_team=pert + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_alien_wave(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.AlienWave).where( + presentation_models.AlienWave.year <= 2022) + ) + + alien = recursive_query(16687) + alien = { + (nren_name, year): YesNoPlanned[answer.strip('"').lower()] for answer_id, nren_name, year, answer in alien + } + nr = recursive_query(16688) + nr = {(nren_name, year): int(answer.strip('"')) for answer_id, nren_name, year, answer in nr} + internal = recursive_query(16689) + internal = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in internal} + + for nren_name, year in alien.keys() | nr.keys() | internal.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + new_entry = presentation_models.AlienWave( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + alien_wave_third_party=alien.get((nren_name, year)), + nr_of_alien_wave_third_party_services=nr.get((nren_name, year)), + alien_wave_internal=internal.get((nren_name, year)) + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_external_connections(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.ExternalConnections).where( + presentation_models.ExternalConnections.year <= 2022) + ) + + question_nrs = { + 16694: (5, "capacity"), + 16695: (7, "capacity"), + 16696: (6, "capacity"), + 16697: (7, "from_organization"), + 16698: (1, "to_organization"), + 16699: (8, "to_organization"), + 16700: (9, "to_organization"), + 16701: (1, "from_organization"), + 16702: (8, "capacity"), + 16703: (5, "to_organization"), + 16704: (0, "link_name"), + 16705: (1, "link_name"), + 16706: (9, "capacity"), + 16707: (2, "link_name"), + 16708: (0, "from_organization"), + 16709: (4, "link_name"), + 16710: (3, "link_name"), + 16711: (9, "link_name"), + 16712: (7, "link_name"), + 16713: (8, "link_name"), + 16714: (6, "link_name"), + 16715: (5, "link_name"), + 16716: (4, "from_organization"), + 16717: (5, "from_organization"), + 16718: (6, "from_organization"), + 16719: (2, "to_organization"), + 16720: (3, "to_organization"), + 16721: (4, "to_organization"), + 16722: (6, "to_organization"), + 16723: (7, "to_organization"), + 16724: (2, "interconnection_method"), + 16725: (3, "interconnection_method"), + 16726: (4, "interconnection_method"), + 16727: (5, "interconnection_method"), + 16728: (8, "from_organization"), + 16729: (9, "from_organization"), + 16730: (0, "to_organization"), + 16731: (0, "capacity"), + 16732: (1, "capacity"), + 16733: (2, "capacity"), + 16734: (3, "capacity"), + 16735: (4, "capacity"), + 16736: (3, "from_organization"), + 16737: (2, "from_organization"), + 16738: (1, "interconnection_method"), + 16739: (7, "interconnection_method"), + 16740: (8, "interconnection_method"), + 16741: (0, "interconnection_method"), + 16742: (9, "interconnection_method"), + 16743: (6, "interconnection_method") + } + + def empty_connection_dict(): + return {'link_name': '', 'capacity': None, 'from_organization': '', + 'to_organization': '', 'interconnection_method': None} + + connection_dicts = {} + nren_year_set = set() + for question_id, (connection_nr, field) in question_nrs.items(): + rows = recursive_query(question_id) + for answer_id, nren_name, year, answer in rows: + nren_year_set.add((nren_name, year)) + conn_dict = connection_dicts.setdefault((nren_name, year, connection_nr), empty_connection_dict()) + conn_dict[field] = answer.strip('" ') + + int_simple = {key.replace(" ", "").lower(): value for key, value in INTERCONNECTION.items()} + int_simple['openexchangepoi'] = "open_exchange" + + for conn_dict in connection_dicts.values(): + if conn_dict['capacity']: + try: + conn_dict['capacity'] = str(Decimal(conn_dict['capacity'].split('G')[0].strip())) + except: # noqa: E722 + logger.warning(f'Capacity could not be converted for {nren_name}: {conn_dict["capacity"]}.') + conn_dict['capacity'] = None + if conn_dict['interconnection_method']: + int_conn = int_simple[conn_dict['interconnection_method'].replace(" ", "").lower()] + conn_dict['interconnection_method'] = ConnectionMethod[int_conn].value + + for nren_name, year in nren_year_set: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + connections = [] + for connection_nr in range(0, 10): + conn = connection_dicts.get((nren_name, year, connection_nr)) + if conn: + connections.append(conn) + + new_entry = presentation_models.ExternalConnections( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + connections=connections + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_network_automation(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.NetworkAutomation).where( + presentation_models.NetworkAutomation.year <= 2022) + ) + + network_automation_map = mapping.VALUE_TO_CODE_MAPPING.get(16758) + + rows = recursive_query(16757) + tasks = recursive_query(16758) + tasks = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in tasks} + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + network_automation = YesNoPlanned[answer.strip('"').lower()] + specifics = tasks.get((nren_name, year), []) + specifics = [network_automation_map.get(s, s) for s in specifics if s] + + new_entry = presentation_models.NetworkAutomation( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + network_automation=network_automation, + network_automation_specifics=specifics + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_network_function_virtualisation(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.NetworkFunctionVirtualisation).where( + presentation_models.NetworkFunctionVirtualisation.year <= 2022) + ) + + nfv_map = mapping.VALUE_TO_CODE_MAPPING.get(16755) + rows = recursive_query(16754) + types = recursive_query(16755) + types = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in types} + types_comment = recursive_query(16756) + types_comment = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in types_comment} + + for answer_id, nren_name, year, answer in rows: + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + nfv = YesNoPlanned[answer.strip('"').lower()] + specifics = types.get((nren_name, year), []) + specifics = list(itertools.chain(*[s.split(', ') for s in specifics if s])) + comment = types_comment.get((nren_name, year), "").replace("-", "") + if comment: + specifics.append(comment) + if "Other" in specifics: + specifics.remove("Other") + + converted_specifics = [] + for specific in specifics: + converted_specifics.append(nfv_map.get(specific, specific)) + + new_entry = presentation_models.NetworkFunctionVirtualisation( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + nfv=nfv, + nfv_specifics=converted_specifics + ) + db.session.merge(new_entry) + db.session.commit() + + +def transfer_monitoring_tools(nren_dict): + + # for this data, nothing comes from the excel publisher, so we can delete all + db.session.execute(delete(presentation_models.MonitoringTools).where( + presentation_models.MonitoringTools.year <= 2022) + ) + + description_map = mapping.VALUE_TO_CODE_MAPPING.get(16672) + + tools = recursive_query(16672) + tools = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in tools} + tools_comment = recursive_query(16673) + tools_comment = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in tools_comment} + netflow = recursive_query(16674) + netflow = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in netflow} + + for nren_name, year in tools.keys() | tools_comment.keys() | netflow.keys(): + if nren_name not in nren_dict: + logger.warning(f'{nren_name} unknown. Skipping.') + continue + + tool_descriptions = tools.get((nren_name, year), []) + comment = tools_comment.get((nren_name, year), "").replace("-", "") + if comment: + tool_descriptions.append(comment) + if "Other" in tool_descriptions: + tool_descriptions.remove("Other") + if "Other " in tool_descriptions: + tool_descriptions.remove("Other ") + + converted_descriptions = [] + + for description in tool_descriptions: + converted_descriptions.append(description_map.get(description, description)) + + new_entry = presentation_models.MonitoringTools( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + tool_descriptions=converted_descriptions, + netflow_processing_description=netflow.get((nren_name, year), "") + ) + db.session.merge(new_entry) + db.session.commit() + + +def _cli(app): + with app.app_context(): + nren_dict = helpers.get_uppercase_nren_dict() + transfer_budget(nren_dict) + transfer_funding_sources(nren_dict) + transfer_staff_data(nren_dict) + transfer_nren_parent_org(nren_dict) + transfer_nren_sub_org(nren_dict) + transfer_charging_structure(nren_dict) + transfer_ec_projects(nren_dict) + transfer_policies(nren_dict) + transfer_institutions_urls(nren_dict) + + transfer_central_procurement(nren_dict) + transfer_service_management(nren_dict) + transfer_service_user_types(nren_dict) + transfer_standards(nren_dict) + transfer_crisis_exercises(nren_dict) + transfer_security_controls(nren_dict) + transfer_eosc_listings(nren_dict) + + transfer_commercial_connectivity(nren_dict) + transfer_commercial_charging_level(nren_dict) + + transfer_fibre_light(nren_dict) + transfer_network_map_urls(nren_dict) + transfer_monitoring_tools(nren_dict) + transfer_traffic_statistics(nren_dict) + transfer_siem_vendors(nren_dict) + transfer_certificate_providers(nren_dict) + transfer_weather_map(nren_dict) + transfer_pert_team(nren_dict) + transfer_alien_wave(nren_dict) + transfer_external_connections(nren_dict) + # traffic ratio was freeform text so we don't transfer it + transfer_network_function_virtualisation(nren_dict) + transfer_network_automation(nren_dict) + + +@click.command() +@click.option('--config', type=click.STRING, default='config.json') +def cli(config): + app_config = load(open(config, 'r')) + + app_config['SQLALCHEMY_BINDS'] = {survey_model.SURVEY_DB_BIND: app_config['SURVEY_DATABASE_URI']} + + app = compendium_v2._create_app_with_db(app_config) + _cli(app) + + +if __name__ == "__main__": + cli() diff --git a/setup.py b/setup.py index bdb8aa63d96fffc7932c4bf8355828c99c4ad9d8..efd59a2b1435c5ada86eed8c93c818eb646affeb 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='compendium-v2', - version="0.84", + version="0.85", author='GEANT', author_email='swd@geant.org', description='Flask and React project for displaying ' @@ -33,6 +33,8 @@ setup( 'conversion=compendium_v2.conversion.conversion:cli', 'dump_survey_model=compendium_v2.migrations.dump_survey_model:cli', 'legacy-survey-publisher=compendium_v2.publishers.legacy_publisher.survey_publisher_legacy:cli', + 'excel-survey-publisher=compendium_v2.publishers.survey_publisher_legacy_excel:cli', + 'db-publisher-2022=compendium_v2.publishers.survey_publisher_old_db_2022:cli', ] }, license='MIT', diff --git a/test/test_survey_publisher_legacy_excel.py b/test/test_survey_publisher_legacy_excel.py new file mode 100644 index 0000000000000000000000000000000000000000..57c936f21af553c6214a35defac709f61d0244b7 --- /dev/null +++ b/test/test_survey_publisher_legacy_excel.py @@ -0,0 +1,117 @@ +import os +import openpyxl + +from sqlalchemy import select, func +from compendium_v2 import db +from compendium_v2.db import presentation_models +from compendium_v2.publishers.survey_publisher_legacy_excel import _cli + + +def test_excel_publisher(app_with_survey_db, mocker, nren_services): + nren_services(app_with_survey_db) + EXCEL_FILE_ORGANISATION = openpyxl.load_workbook(os.path.join(os.path.dirname( + __file__), "data", "2021_Organisation_DataSeries.xlsx"), data_only=True, read_only=True) + EXCEL_FILE_USERS = openpyxl.load_workbook(os.path.join(os.path.dirname( + __file__), "data", "2022_Connected_Users_DataSeries.xlsx"), data_only=True, read_only=True) + EXCEL_FILE_NETWORKS = openpyxl.load_workbook(os.path.join(os.path.dirname( + __file__), "data", "2022_Networks_DataSeries.xlsx"), data_only=True, read_only=True) + EXCEL_FILE_NREN_SERVICES = openpyxl.load_workbook(os.path.join(os.path.dirname( + __file__), "data", "NREN-Services-prefills_2023_Recovered.xlsx"), data_only=True, read_only=True) + mocker.patch('compendium_v2.publishers.excel_parser.EXCEL_FILE_ORGANISATION', EXCEL_FILE_ORGANISATION) + mocker.patch('compendium_v2.publishers.excel_parser.EXCEL_FILE_USERS', EXCEL_FILE_USERS) + mocker.patch('compendium_v2.publishers.excel_parser.EXCEL_FILE_NETWORKS', EXCEL_FILE_NETWORKS) + mocker.patch('compendium_v2.publishers.excel_parser.EXCEL_FILE_NREN_SERVICES', EXCEL_FILE_NREN_SERVICES) + + with app_with_survey_db.app_context(): + nren_names = ['SURF', 'KIFU', 'University of Malta', 'ASNET-AM', 'SIKT', 'LAT', 'RASH', 'AzScienceNet', 'GRNET', + 'CSC', 'PSNC'] + db.session.add_all([presentation_models.NREN(name=nren_name, country='country') for nren_name in nren_names]) + db.session.commit() + + _cli(app_with_survey_db) + + with app_with_survey_db.app_context(): + budget_count = db.session.scalar(select(func.count(presentation_models.BudgetEntry.year))) + assert budget_count + funding_source_count = db.session.scalar(select(func.count(presentation_models.FundingSource.year))) + assert funding_source_count + charging_structure_count = db.session.scalar(select(func.count(presentation_models.ChargingStructure.year))) + assert charging_structure_count + staff_data = db.session.scalars(select(presentation_models.NrenStaff).order_by( + presentation_models.NrenStaff.year.asc()) + ).all() + + # data should only be saved for the NRENs we have saved in the database + staff_data_nrens = set([staff.nren.name for staff in staff_data]) + assert len(staff_data_nrens) == len(nren_names) - 1 # no UoM data + + kifu_data = [staff for staff in staff_data if staff.nren.name == 'KIFU'] + # check that the data is saved correctly for KIFU, it should be OK for the rest then.. + assert len(kifu_data) == 6 + + assert kifu_data[0].year == 2016 + assert kifu_data[0].permanent_fte == 100 + assert kifu_data[0].subcontracted_fte == 2 + assert kifu_data[0].technical_fte == 0 + assert kifu_data[0].non_technical_fte == 0 + + assert kifu_data[1].year == 2017 + assert kifu_data[1].permanent_fte == 80 + assert kifu_data[1].subcontracted_fte == 2 + assert kifu_data[1].technical_fte == 0 + assert kifu_data[1].non_technical_fte == 0 + + assert kifu_data[2].year == 2018 + assert kifu_data[2].permanent_fte == 80 + assert kifu_data[2].subcontracted_fte == 3 + assert kifu_data[2].technical_fte == 0 + assert kifu_data[2].non_technical_fte == 0 + + assert kifu_data[3].year == 2019 + assert kifu_data[3].permanent_fte == 148 + assert kifu_data[3].subcontracted_fte == 4 + assert kifu_data[3].technical_fte == 117 + assert kifu_data[3].non_technical_fte == 33 + + assert kifu_data[4].year == 2020 + assert kifu_data[4].permanent_fte == 190 + assert kifu_data[4].subcontracted_fte == 3 + assert kifu_data[4].technical_fte == 133 + assert kifu_data[4].non_technical_fte == 60 + + assert kifu_data[5].year == 2021 + assert kifu_data[5].permanent_fte == 178 + assert kifu_data[5].subcontracted_fte == 3 + assert kifu_data[5].technical_fte == 133 + assert kifu_data[5].non_technical_fte == 45 + + ecproject_data = db.session.scalars(select(presentation_models.ECProject)).all() + # test a couple of random entries + surf2017 = [x for x in ecproject_data if x.nren.name == 'SURF' and x.year == 2017] + assert len(surf2017) == 1 + assert surf2017[0].project == 'Asterics and Magic' + + asnetam2018 = [x for x in ecproject_data if x.nren.name == 'ASNET-AM' and x.year == 2018] + assert len(asnetam2018) == 1 + assert asnetam2018[0].project == 'EaPConnect' + + kifu2019 = [x for x in ecproject_data if x.nren.name == 'KIFU' and x.year == 2019] + assert len(kifu2019) == 4 + assert kifu2019[3].project == 'SuperHeroes for Science' + + parent_data = db.session.scalars(select(presentation_models.ParentOrganization)).all() + # test a random entry + asnet2021 = [x for x in parent_data if x.nren.name == 'ASNET-AM' and x.year == 2021] + assert len(asnet2021) == 1 + assert asnet2021[0].organization\ + == 'Institute for Informatics and Automation Problems of the National Academy of Sciences of Armenia' + + service_data = db.session.scalars(select(presentation_models.Service)).all() + assert len(service_data) > 70 + + nren_service_data = db.session.scalars(select(presentation_models.NRENService)).all() + # test a random entry + sikt2022 = [x for x in nren_service_data + if x.nren.name == 'SIKT' and x.year == 2022 and x.service.name == 'Journal access'] + assert len(sikt2022) == 1 + assert sikt2022[0].additional_information.startswith("Sikt negotiates license a") diff --git a/test/test_survey_publisher_old_db_2022.py b/test/test_survey_publisher_old_db_2022.py new file mode 100644 index 0000000000000000000000000000000000000000..ed5e2e5f5fb5c9c1bcff9484e3b49f9e56bc8d3d --- /dev/null +++ b/test/test_survey_publisher_old_db_2022.py @@ -0,0 +1,309 @@ +from sqlalchemy import select + +from compendium_v2.db import db, presentation_model_enums, presentation_models +from compendium_v2.publishers.survey_publisher_old_db_2022 import _cli, FundingSource, \ + StaffQuestion, OrgQuestion, ChargingStructure, ECQuestion + + +def org_data(question): + """ + This function defines test data for the org questions. + + The following data is defined for the appropriate questions as modeled in Compendium: + + nren1,CYNET-CSIRT,cert team + nren1,DFN-CERT,CERT + nren2,Educampus Services,MIS shared services for third level. + nren3,VilniusTech,Technical centre + nren3,KU,Technical centre + nren3,VDU,Technical centre + nren3,VU,Technical centre + nren3,org_data +org_data +org_data +org_data +org_dataKTU,"NOC, administrative authority" + """ + + if question == OrgQuestion.PARENT_ORG_NAME: + return [ + ('nren1', 'Org1'), + ('nren3', 'Org3'), + ] + + if str(question.name).endswith('1_NAME'): + return [ + ('nren1', 'CYNET-CSIRT'), + ('nren2', 'Educampus Services'), + ('nren3', 'VilniusTech'), + ] + + if str(question.name).endswith('2_NAME'): + return [ + ('nren1', 'DFN-CERT'), + ('nren3', 'KU'), + ] + + if str(question.name).endswith('3_NAME'): + return [ + ('nren3', 'VDU'), + ] + + if str(question.name).endswith('4_NAME'): + return [ + ('nren3', 'VU'), + ] + + if str(question.name).endswith('5_NAME'): + return [ + ('nren3', 'KTU'), + ] + + if str(question.name).endswith('1_CHOICE'): + return [ + ('nren1', 'other'), + ('nren2', 'other'), + ('nren3', 'Technical centre'), + ] + + if str(question.name).endswith('2_CHOICE'): + return [ + ('nren1', 'other'), + ('nren3', 'Technical centre'), + ] + + if str(question.name).endswith('3_CHOICE'): + return [ + ('nren3', 'Technical centre'), + ] + + if str(question.name).endswith('4_CHOICE'): + return [ + ('nren3', 'Technical centre'), + ] + + if str(question.name).endswith('5_CHOICE'): + return [ + ('nren3', 'other'), + ] + + if str(question.name).endswith('1_ROLE'): + return [ + ('nren1', 'cert team'), + ('nren2', 'MIS shared services for third level.') + ] + + if str(question.name).endswith('2_ROLE'): + return [ + ('nren1', 'CERT'), + ] + + if str(question.name).endswith('3_ROLE'): + return [] + + if str(question.name).endswith('4_ROLE'): + return [] + + if str(question.name).endswith('5_ROLE'): + return [ + ('nren3', 'NOC, administrative authority') + ] + + +def test_publisher(app_with_survey_db, mocker): + global org_data + + def get_rows_as_tuples(*args, **kwargs): + return [ + ('nren1', '100'), + ('nren2', '200'), + ('nren3', '300'), + ('nren4', 'abcd') + ] + + def funding_source_data(): + yield FundingSource.CLIENT_INSTITUTIONS, [ + ('nren1', '10'), + ('nren2', '80'), + ('nren3', '30'), + ] + yield FundingSource.EUROPEAN_FUNDING, [ + ('nren1', '50'), + ('nren2', '20'), + ('nren3', '30'), + ] + yield FundingSource.OTHER, [ + ('nren1', '40'), + ('nren2', 'abc'), + ('nren3', '30'), + ] + + def question_data(question): + if question == StaffQuestion.NON_TECHNICAL_FTE: + return [ + ('nren1', '10'), + ('nren2', '80'), + ('nren3', '30'), + ] + + if question == StaffQuestion.TECHNICAL_FTE: + return [ + ('nren1', '50'), + ('nren2', '20'), + ('nren3', '30'), + ] + + if question == StaffQuestion.PERMANENT_FTE: + return [ + ('nren1', '60'), + ('nren2', 'abc'), + ('nren3', '30'), + ] + + if question == StaffQuestion.SUBCONTRACTED_FTE: + return [ + ('nren1', '0'), + ('nren2', '0'), + ('nren3', '0'), + ] + if question in OrgQuestion: + return org_data(question) + + if question == ChargingStructure.charging_structure: + return [ + ('nren1', 'We do not charge them directly'), + ('nren2', 'We charge a usage-based fee'), + ('nren3', 'Other'), + ] + + if question in ECQuestion: + return [ + ('nren1', '[""'), + ('nren2', '["project1", "project2"]'), + ('nren3', '["project3"]'), + ] + + def question_id_data(question_id, year): + if question_id in [ + 16469, 16064, 15720, 15305, 14910, 16471, 16066, 15722, 15307, 14912, 16473, 16378, + 16475, 16068, 15724, 15309, 14914, 16477, 16070, 15726, 15311, 14916, 16479, 16072, 15728, 15575, + 16481, 16074, 15730, 15577, 16761]: + return [ + ('nren1', f'www.nren.com/somepolicy{year}.pdf'), + ('nren2', 'policyemail@nren.com'), + ('nren3', 'n.a. online'), + ] + + def institutions_urls_data(question_id): + if question_id == 16507: + return [ + (87483, 'ANA', 2013, "http://www.rash.al/index.php/network/points-of-presence-pop"), + (163286, 'ANA', 2014, "http://www.rash.al/index.php/network/points-of-presence-pop"), + ] + else: + return [] + + mocker.patch('compendium_v2.publishers.survey_publisher_old_db_2022.query_budget', get_rows_as_tuples) + mocker.patch('compendium_v2.publishers.survey_publisher_old_db_2022.query_funding_sources', funding_source_data) + mocker.patch('compendium_v2.publishers.survey_publisher_old_db_2022.query_question', question_data) + mocker.patch('compendium_v2.publishers.survey_publisher_old_db_2022.query_question_id', question_id_data) + mocker.patch('compendium_v2.publishers.survey_publisher_old_db_2022.recursive_query', institutions_urls_data) + + nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'University of Malta', 'ASNET-AM', + 'SIKT', 'LAT', 'RASH', 'AzScienceNet', 'GRNET', 'CSC', 'PSNC'] + with app_with_survey_db.app_context(): + db.session.add_all([presentation_models.NREN(name=nren_name, country='country') for nren_name in nren_names]) + db.session.commit() + + _cli(app_with_survey_db) + + with app_with_survey_db.app_context(): + budgets = db.session.scalars( + select(presentation_models.BudgetEntry).order_by(presentation_models.BudgetEntry.nren_id.asc()) + ).all() + assert len(budgets) == 3 + assert budgets[0].nren.name.lower() == 'nren1' + assert budgets[0].budget == 100 + + funding_sources = db.session.scalars( + select(presentation_models.FundingSource).order_by(presentation_models.FundingSource.nren_id.asc()) + ).all() + assert len(funding_sources) == 2 + assert funding_sources[0].nren.name.lower() == 'nren1' + assert funding_sources[0].client_institutions == 10 + assert funding_sources[0].european_funding == 50 + assert funding_sources[0].other == 40 + + assert funding_sources[1].nren.name.lower() == 'nren2' + assert funding_sources[1].client_institutions == 80 + assert funding_sources[1].european_funding == 20 + assert funding_sources[1].other == 0 + + staff_data = db.session.scalars( + select(presentation_models.NrenStaff).order_by(presentation_models.NrenStaff.nren_id.asc()) + ).all() + + assert len(staff_data) == 1 + assert staff_data[0].nren.name.lower() == 'nren1' + assert staff_data[0].non_technical_fte == 10 + assert staff_data[0].technical_fte == 50 + assert staff_data[0].permanent_fte == 60 + assert staff_data[0].subcontracted_fte == 0 + + _org_data = db.session.scalars( + select(presentation_models.ParentOrganization).order_by( + presentation_models.ParentOrganization.nren_id.asc()) + ).all() + + assert len(_org_data) == 2 + assert _org_data[0].nren.name.lower() == 'nren1' + assert _org_data[0].organization == 'Org1' + + assert _org_data[1].nren.name.lower() == 'nren3' + assert _org_data[1].organization == 'Org3' + + charging_structures = db.session.scalars( + select(presentation_models.ChargingStructure).order_by(presentation_models.ChargingStructure.nren_id.asc()) + ).all() + assert len(charging_structures) == 3 + assert charging_structures[0].nren.name.lower() == 'nren1' + assert charging_structures[0].fee_type == presentation_model_enums.FeeType.no_charge + assert charging_structures[1].nren.name.lower() == 'nren2' + assert charging_structures[1].fee_type == presentation_model_enums.FeeType.usage_based_fee + assert charging_structures[2].nren.name.lower() == 'nren3' + assert charging_structures[2].fee_type == presentation_model_enums.FeeType.other + + _ec_data = db.session.scalars( + select(presentation_models.ECProject).order_by(presentation_models.ECProject.nren_id.asc()) + ).all() + + assert len(_ec_data) == 3 + assert _ec_data[0].nren.name.lower() == 'nren2' + assert _ec_data[0].project == 'project1' + + assert _ec_data[1].nren.name.lower() == 'nren2' + assert _ec_data[1].project == 'project2' + + assert _ec_data[2].nren.name.lower() == 'nren3' + assert _ec_data[2].project == 'project3' + + policy_data = db.session.scalars( + select(presentation_models.Policy).order_by(presentation_models.Policy.nren_id.asc()) + ).all() + policy_data_2020 = [p for p in policy_data if p.year == 2020] + policy_data_2022 = [p for p in policy_data if p.year == 2022] + assert len(policy_data_2020) == 2 + assert len(policy_data_2022) == 2 + assert policy_data_2020[0].strategic_plan == 'www.nren.com/somepolicy2020.pdf' + assert policy_data_2020[1].strategic_plan == 'policyemail@nren.com' + + _institution_urls_data = db.session.scalars( + select(presentation_models.InstitutionURLs).order_by(presentation_models.InstitutionURLs.nren_id.asc()) + ).all() + assert len(_institution_urls_data) == 2 + assert _institution_urls_data[0].nren.name.lower() == 'rash' + assert _institution_urls_data[0].year == 2013 + assert _institution_urls_data[0].urls == ["http://www.rash.al/index.php/network/points-of-presence-pop"] + assert _institution_urls_data[1].nren.name.lower() == 'rash' + assert _institution_urls_data[1].year == 2014 + assert _institution_urls_data[1].urls == ["http://www.rash.al/index.php/network/points-of-presence-pop"]