-
Bjarke Madsen authoredBjarke Madsen authored
survey_publisher_legacy_excel.py 20.25 KiB
"""
survey_publisher_v1
=========================
This module loads the survey data from before 2022 from a legacy Excel files.
Missing info is filled in from the survey db for some questions.
Registered as click cli command when installing compendium-v2.
"""
from __future__ import annotations
import json
import logging
import math
import click
from sqlalchemy import select, delete
import compendium_v2
from compendium_v2.config import load
from compendium_v2.db import db, presentation_models
from compendium_v2.environment import setup_logging
from compendium_v2.publishers import helpers, excel_parser
from compendium_v2.resources import get_resource_file_path
from compendium_v2.survey_db import model as survey_model
setup_logging()
logger = logging.getLogger('survey-publisher-legacy-excel')
def db_budget_migration(nren_dict):
# move data from Survey DB budget table
data = db.session.scalars(select(survey_model.Nrens))
db.session.execute(delete(presentation_models.BudgetEntry).where(
presentation_models.BudgetEntry.year < 2022))
for nren in data:
for budget in nren.budgets:
abbrev = nren.abbreviation.upper()
year = budget.year
if float(budget.budget) > 400:
logger.warning(f'Incorrect Data: {abbrev} has budget set >400M EUR for {year}. ({budget.budget})')
continue
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
budget_entry = presentation_models.BudgetEntry(
nren=nren_dict[abbrev],
nren_id=nren_dict[abbrev].id,
budget=float(budget.budget),
year=year
)
db.session.merge(budget_entry)
# Import the data from excel sheet to database
exceldata = excel_parser.fetch_budget_excel_data()
for abbrev, budget, year in exceldata:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
if budget > 400:
logger.warning(f'{nren} has budget set to >400M EUR for {year}. ({budget})')
continue
budget_entry = presentation_models.BudgetEntry(
nren=nren_dict[abbrev],
nren_id=nren_dict[abbrev].id,
budget=budget,
year=year
)
db.session.merge(budget_entry)
db.session.commit()
def db_funding_migration(nren_dict):
# Import the data to database
data = excel_parser.fetch_funding_excel_data()
db.session.execute(delete(presentation_models.FundingSource).where(
presentation_models.FundingSource.year < 2022))
for (abbrev, year, client_institution,
european_funding,
gov_public_bodies,
commercial, other) in data:
_data = [client_institution, european_funding, gov_public_bodies, commercial, other]
total = sum(_data)
if not math.isclose(total, 100, abs_tol=0.01):
logger.warning(f'{abbrev} funding sources for {year} do not sum to 100% ({total})')
continue
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
budget_entry = presentation_models.FundingSource(
nren=nren_dict[abbrev],
nren_id=nren_dict[abbrev].id,
year=year,
client_institutions=client_institution,
european_funding=european_funding,
gov_public_bodies=gov_public_bodies,
commercial=commercial,
other=other)
db.session.merge(budget_entry)
db.session.commit()
def db_charging_structure_migration(nren_dict):
# Import the data to database
data = excel_parser.fetch_charging_structure_excel_data()
db.session.execute(delete(presentation_models.ChargingStructure).where(
presentation_models.ChargingStructure.year < 2022))
for (abbrev, year, charging_structure) in data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
charging_structure_entry = presentation_models.ChargingStructure(
nren=nren_dict[abbrev],
nren_id=nren_dict[abbrev].id,
year=year,
fee_type=charging_structure
)
db.session.merge(charging_structure_entry)
db.session.commit()
def db_staffing_migration(nren_dict):
staff_data = excel_parser.fetch_staffing_excel_data()
db.session.execute(delete(presentation_models.NrenStaff).where(
presentation_models.NrenStaff.year < 2022))
nren_staff_map = {}
for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping staff data.')
continue
nren = nren_dict[abbrev]
nren_staff_map[(nren.id, year)] = presentation_models.NrenStaff(
nren=nren,
nren_id=nren.id,
year=year,
permanent_fte=permanent_fte,
subcontracted_fte=subcontracted_fte,
technical_fte=0,
non_technical_fte=0
)
function_data = excel_parser.fetch_staff_function_excel_data()
for (abbrev, year, technical_fte, non_technical_fte) in function_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping staff function data.')
continue
nren = nren_dict[abbrev]
if (nren.id, year) in nren_staff_map:
nren_staff_map[(nren.id, year)].technical_fte = technical_fte
nren_staff_map[(nren.id, year)].non_technical_fte = non_technical_fte
else:
nren_staff_map[(nren.id, year)] = presentation_models.NrenStaff(
nren=nren,
nren_id=nren.id,
year=year,
permanent_fte=0,
subcontracted_fte=0,
technical_fte=technical_fte,
non_technical_fte=non_technical_fte
)
for nren_staff_model in nren_staff_map.values():
employed = nren_staff_model.permanent_fte + nren_staff_model.subcontracted_fte
technical = nren_staff_model.technical_fte + nren_staff_model.non_technical_fte
if not math.isclose(employed, technical, abs_tol=0.01) and employed != 0 and technical != 0:
logger.warning(f'{nren_staff_model.nren.name} in {nren_staff_model.year}:'
f' FTE do not equal across employed/technical categories ({employed} != {technical})')
db.session.merge(nren_staff_model)
db.session.commit()
def db_ecprojects_migration(nren_dict):
ecproject_data = excel_parser.fetch_ecproject_excel_data()
for (abbrev, year, project) in ecproject_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
ecproject_entry = presentation_models.ECProject(nren=nren, nren_id=nren.id, year=year, project=project)
db.session.merge(ecproject_entry)
db.session.commit()
def db_organizations_migration(nren_dict):
organization_data = excel_parser.fetch_organization_excel_data()
for (abbrev, year, org) in organization_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
org_entry = presentation_models.ParentOrganization(nren=nren, nren_id=nren.id, year=year, organization=org)
db.session.merge(org_entry)
db.session.commit()
def db_traffic_volume_migration(nren_dict):
traffic_data = excel_parser.fetch_traffic_excel_data()
for (abbrev, year, from_external, to_external, from_customers, to_customers) in traffic_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
traffic_entry = presentation_models.TrafficVolume(
nren=nren,
nren_id=nren.id,
year=year,
from_customers=from_customers,
to_customers=to_customers,
from_external=from_external,
to_external=to_external
)
db.session.merge(traffic_entry)
db.session.commit()
def db_services_migration():
with open(get_resource_file_path('nren_services.json')) as f:
services = json.load(f)
for name_key, record in services.items():
service = presentation_models.Service(
name_key=name_key,
name=record['name'],
category=record['category'],
description=record['description'],
)
db.session.merge(service)
db.session.commit()
def db_nren_services_migration(nren_dict):
services = [s for s in db.session.scalars(select(presentation_models.Service))]
for service_info in excel_parser.fetch_nren_services_excel_data():
[service] = [s for s in services if s.name_key == service_info['service_name_key']]
abbrev = service_info['nren_name']
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
nren_service = presentation_models.NRENService(
nren=nren,
nren_id=nren.id,
year=service_info['year'],
service=service,
service_key=service.name_key,
product_name=service_info['product_name'],
additional_information=service_info['additional_information'],
official_description=service_info['official_description']
)
db.session.merge(nren_service)
db.session.commit()
def db_connected_proportion_migration(nren_dict):
remit = excel_parser.fetch_remit_excel_data()
nr_connected = excel_parser.fetch_nr_connected_excel_data()
market_share = excel_parser.fetch_market_share_excel_data()
users_served = excel_parser.fetch_users_served_excel_data()
all_entry_keys = set()
all_entry_keys.update(remit.keys())
all_entry_keys.update(nr_connected.keys())
all_entry_keys.update(market_share.keys())
all_entry_keys.update(users_served.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connected_proportion = presentation_models.ConnectedProportion(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
coverage=remit.get(key),
number_connected=nr_connected.get(key),
market_share=market_share.get(key),
users_served=users_served.get(key)
)
db.session.merge(connected_proportion)
db.session.commit()
def db_connectivity_level_migration(nren_dict):
typical_speed = excel_parser.fetch_typical_speed_excel_data()
highest_speed = excel_parser.fetch_highest_speed_excel_data()
highest_speed_proportion = excel_parser.fetch_highest_speed_proportion_excel_data()
all_entry_keys = set()
all_entry_keys.update(typical_speed.keys())
all_entry_keys.update(highest_speed.keys())
all_entry_keys.update(highest_speed_proportion.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connected_proportion = presentation_models.ConnectivityLevel(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
typical_speed=typical_speed.get(key),
highest_speed=highest_speed.get(key),
highest_speed_proportion=highest_speed_proportion.get(key)
)
db.session.merge(connected_proportion)
db.session.commit()
def db_connection_carrier_migration(nren_dict):
carriers = excel_parser.fetch_carriers_excel_data()
for key, carry_mechanism in carriers.items():
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connection_carrier = presentation_models.ConnectionCarrier(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
carry_mechanism=carry_mechanism
)
db.session.merge(connection_carrier)
db.session.commit()
def db_connectivity_growth_migration(nren_dict):
growth = excel_parser.fetch_growth_excel_data()
for key, growth_percent in growth.items():
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connectivity_growth = presentation_models.ConnectivityGrowth(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
growth=growth_percent
)
db.session.merge(connectivity_growth)
db.session.commit()
def db_connectivity_load_migration(nren_dict):
average = excel_parser.fetch_average_traffic_excel_data()
peak = excel_parser.fetch_peak_traffic_excel_data()
all_entry_keys = set()
all_entry_keys.update(average.keys())
all_entry_keys.update(peak.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connectivity_load = presentation_models.ConnectivityLoad(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
average_load_from_institutions=average.get(key, (None, None))[0],
average_load_to_institutions=average.get(key, (None, None))[1],
peak_load_from_institutions=peak.get(key, (None, None))[0],
peak_load_to_institutions=peak.get(key, (None, None))[1]
)
db.session.merge(connectivity_load)
db.session.commit()
def db_remote_campuses_migration(nren_dict):
campuses = excel_parser.fetch_remote_campuses_excel_data()
for (abbrev, year, connectivity, country, connected_to_r_e) in campuses:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
connections = []
if country:
connections.append({'country': country, 'local_r_and_e_connection': connected_to_r_e})
nren = nren_dict[abbrev]
new_entry = presentation_models.RemoteCampuses(
nren=nren,
nren_id=nren.id,
year=year,
remote_campus_connectivity=connectivity,
connections=connections
)
db.session.merge(new_entry)
db.session.commit()
def db_dark_fibre_lease_migration(nren_dict):
data_rows = excel_parser.fetch_dark_fibre_iru_excel_data()
iru_duration = excel_parser.fetch_iru_duration_excel_data()
for (abbrev, year, iru, length_in_country, length_out_country) in data_rows:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.DarkFibreLease(
nren=nren,
nren_id=nren.id,
year=year,
iru_or_lease=iru,
fibre_length_in_country=length_in_country,
fibre_length_outside_country=length_out_country,
iru_duration=iru_duration.get((abbrev, year))
)
db.session.merge(new_entry)
db.session.commit()
def db_dark_fibre_installed_migration(nren_dict):
data_rows = excel_parser.fetch_dark_fibre_installed_excel_data()
for (abbrev, year, installed, length) in data_rows:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.DarkFibreInstalled(
nren=nren,
nren_id=nren.id,
year=year,
installed=installed,
fibre_length_in_country=length
)
db.session.merge(new_entry)
db.session.commit()
def db_passive_monitoring_migration(nren_dict):
data_rows = excel_parser.fetch_passive_monitoring_excel_data()
for (abbrev, year, monitoring, method) in data_rows:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.PassiveMonitoring(
nren=nren,
nren_id=nren.id,
year=year,
monitoring=monitoring,
method=method
)
db.session.merge(new_entry)
db.session.commit()
def db_capacity_migration(nren_dict):
largest_data_rows = excel_parser.fetch_largest_link_capacity_excel_data()
typical_data_rows = excel_parser.fetch_typical_backbone_capacity_excel_data()
for abbrev, year in largest_data_rows.keys() | typical_data_rows.keys():
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.Capacity(
nren=nren,
nren_id=nren.id,
year=year,
largest_link_capacity=largest_data_rows.get((abbrev, year)),
typical_backbone_capacity=typical_data_rows.get((abbrev, year))
)
db.session.merge(new_entry)
db.session.commit()
def db_non_r_e_peers_migration(nren_dict):
data_rows = excel_parser.fetch_non_r_e_peers_excel_data()
for (abbrev, year, nr_of_non_r_and_e_peers) in data_rows:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.NonREPeers(
nren=nren,
nren_id=nren.id,
year=year,
nr_of_non_r_and_e_peers=nr_of_non_r_and_e_peers
)
db.session.merge(new_entry)
db.session.commit()
def db_ops_automation_migration(nren_dict):
data_rows = excel_parser.fetch_ops_automation_excel_data()
for (abbrev, year, automation, specifics) in data_rows:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
new_entry = presentation_models.OpsAutomation(
nren=nren,
nren_id=nren.id,
year=year,
ops_automation=automation,
ops_automation_specifics=specifics
)
db.session.merge(new_entry)
db.session.commit()
def _cli(app):
with app.app_context():
nren_dict = helpers.get_uppercase_nren_dict()
db_budget_migration(nren_dict)
db_funding_migration(nren_dict)
db_charging_structure_migration(nren_dict)
db_staffing_migration(nren_dict)
db_ecprojects_migration(nren_dict)
db_organizations_migration(nren_dict)
db_traffic_volume_migration(nren_dict)
db_services_migration()
db_nren_services_migration(nren_dict)
db_connected_proportion_migration(nren_dict)
db_connectivity_level_migration(nren_dict)
db_connection_carrier_migration(nren_dict)
db_connectivity_growth_migration(nren_dict)
db_connectivity_load_migration(nren_dict)
db_remote_campuses_migration(nren_dict)
db_dark_fibre_lease_migration(nren_dict)
db_dark_fibre_installed_migration(nren_dict)
db_passive_monitoring_migration(nren_dict)
db_capacity_migration(nren_dict)
db_non_r_e_peers_migration(nren_dict)
db_ops_automation_migration(nren_dict)
@click.command()
@click.option('--config', type=click.STRING, default='config.json')
def cli(config):
app_config = load(open(config, 'r'))
app_config['SQLALCHEMY_BINDS'] = {survey_model.SURVEY_DB_BIND: app_config['SURVEY_DATABASE_URI']}
app = compendium_v2._create_app_with_db(app_config)
print("survey-publisher-v1 starting")
_cli(app)
if __name__ == "__main__":
cli()