Skip to content
Snippets Groups Projects
Commit ba3d45f7 authored by Remco Tukker's avatar Remco Tukker
Browse files

excel publisher for connected users questions

parent f21f4f8f
Branches
Tags
No related merge requests found
......@@ -475,7 +475,6 @@ class NetworkAutomation(db.Model):
class Service(db.Model):
__tablename__ = 'service'
name_key: Mapped[str128_pk]
name: Mapped[str128]
category: Mapped[ServiceCategory]
......@@ -484,7 +483,6 @@ class Service(db.Model):
class NRENService(db.Model):
__tablename__ = 'nren_service'
nren_id: Mapped[int_pk_fkNREN]
nren: Mapped[NREN] = relationship(lazy='joined')
year: Mapped[int_pk]
......
......@@ -3,7 +3,7 @@ import logging
import openpyxl
from compendium_v2.conversion import mapping
from compendium_v2.db.presentation_models import FeeType
from compendium_v2.db.presentation_model_enums import CarryMechanism, ConnectivityCoverage, UserCategory, FeeType
from compendium_v2.environment import setup_logging
from compendium_v2.resources import get_resource_file_path
......@@ -12,6 +12,7 @@ setup_logging()
logger = logging.getLogger(__name__)
EXCEL_FILE_ORGANISATION = get_resource_file_path("2021_Organisation_DataSeries.xlsx")
EXCEL_FILE_USERS = get_resource_file_path("2022_Connected_Users_DataSeries.xlsx")
EXCEL_FILE_NETWORKS = get_resource_file_path("2022_Networks_DataSeries.xlsx")
EXCEL_FILE_NREN_SERVICES = get_resource_file_path("NREN-Services-prefills_2023_Recovered.xlsx")
......@@ -440,3 +441,387 @@ def fetch_nren_services_excel_data():
'additional_information': additional_information.strip(),
'official_description': '',
}
def get_category(excel_cat):
if not excel_cat:
return None
if "universit" in excel_cat.lower():
return UserCategory.universities
if "research ins" in excel_cat.lower():
return UserCategory.institutes
if "further" in excel_cat.lower() or "fe" == excel_cat.lower():
return UserCategory.further_education
if "inter" in excel_cat.lower():
return UserCategory.iros
if "cultural" in excel_cat.lower() or "librar" in excel_cat.lower():
return UserCategory.cultural
if "hospital" in excel_cat.lower():
return UserCategory.hospitals
if "primary" in excel_cat.lower():
return UserCategory.primary_schools
if "secondary" in excel_cat.lower():
return UserCategory.secondary_schools
if "govern" in excel_cat.lower():
return UserCategory.government
if "profit" in excel_cat.lower():
return UserCategory.for_profit_orgs
logger.warning(f'unknown user category: {excel_cat}')
def fetch_remit_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Connectivity Remit"
ws = wb[sheet_name]
rows = list(ws.rows)
def get_remit(excel_remit):
if not excel_remit:
return None
if "including transit" in excel_remit.lower():
return ConnectivityCoverage.yes_incl_other
if "national nren" in excel_remit.lower():
return ConnectivityCoverage.yes_national_nren
if "some circ" in excel_remit.lower():
return ConnectivityCoverage.sometimes
if "policy reas" in excel_remit.lower():
return ConnectivityCoverage.no_policy
if "financial" in excel_remit.lower():
return ConnectivityCoverage.no_financial
if "other reason" in excel_remit.lower():
return ConnectivityCoverage.no_other
if "unsure" in excel_remit.lower():
return ConnectivityCoverage.unsure
logger.warning(f'unknown remit: {excel_remit}')
result = {}
def create_points_for_year(year, start_column):
for i in range(8, 51):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for col in range(start_column + 2, start_column + 21, 2):
c = col
if year == 2021 and col > 30:
c += 2
category = get_category(rows[7][c].value)
remit = get_remit(rows[i][c].value)
if category and remit:
result[(nren_name, year, category)] = remit
create_points_for_year(2019, 72)
create_points_for_year(2020, 50)
create_points_for_year(2021, 26)
create_points_for_year(2022, 3)
return result
def fetch_nr_connected_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Connected Institutions"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
nr_connected = int(rows[i][c].value) if rows[i][c].value else None
if category and nr_connected:
result[(nren_name, year, category)] = nr_connected
create_points_for_year(2019, 39)
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_market_share_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table Market Share"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(8, 51):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[7][c].value)
percentage_connected = float(rows[i][c].value) if rows[i][c].value else None
if category and percentage_connected:
result[(nren_name, year, category)] = percentage_connected
create_points_for_year(2017, 64)
create_points_for_year(2018, 52)
create_points_for_year(2019, 40)
create_points_for_year(2020, 28)
create_points_for_year(2021, 16)
create_points_for_year(2022, 3)
return result
def fetch_users_served_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Users"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(4, 47):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[3][c].value)
users_connected = int(rows[i][c].value) if rows[i][c].value else None
if category and users_connected:
result[(nren_name, year, category)] = users_connected
create_points_for_year(2019, 40)
create_points_for_year(2020, 28)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_typical_speed_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table _Typical IP Link capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(33, 76):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[32][c].value)
typical_speed = int(rows[i][c].value) if rows[i][c].value else None
if category and typical_speed:
result[(nren_name, year, category)] = typical_speed
create_points_for_year(2017, 75)
create_points_for_year(2018, 50)
create_points_for_year(2019, 38)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_highest_speed_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table _Highest IP Link capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(33, 76):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[32][c].value)
highest_speed = int(rows[i][c].value) if rows[i][c].value else None
if category and highest_speed:
result[(nren_name, year, category)] = highest_speed
create_points_for_year(2017, 64)
create_points_for_year(2018, 51)
create_points_for_year(2019, 38)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_highest_speed_proportion_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Aver High cap conn Share"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
highest_speed = float(rows[i][c].value) if rows[i][c].value else None
if category and highest_speed:
result[(nren_name, year, category)] = highest_speed
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_carriers_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Traffic carriers"
ws = wb[sheet_name]
rows = list(ws.rows)
def get_carrier(excel_carrier):
if not excel_carrier:
return None
if "comme" in excel_carrier.lower():
return CarryMechanism.commercial_provider_backbone
if "man" in excel_carrier.lower():
return CarryMechanism.man
if "local loop" in excel_carrier.lower():
return CarryMechanism.nren_local_loops
if "other" in excel_carrier.lower():
return CarryMechanism.other
if "regional" in excel_carrier.lower():
return CarryMechanism.regional_nren_backbone
logger.warning(f'unknown carrier: {excel_carrier}')
result = {}
def create_points_for_year(year, start_column):
for i in range(3, 46):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[2][c].value)
carrier = get_carrier(rows[i][c].value)
if category and carrier:
result[(nren_name, year, category)] = carrier
create_points_for_year(2019, 40)
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_growth_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Table Traffic Growth % "
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 46):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
growth = float(rows[i][c].value) if rows[i][c].value else None
if category and growth:
result[(nren_name, year, category)] = growth
create_points_for_year(2019, 40)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_average_traffic_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Average Traffic"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 21, 2):
category = get_category(rows[3][c].value)
from_inst = int(rows[i][c].value) if rows[i][c].value else None
to_inst = int(rows[i][c+1].value) if rows[i][c+1].value else None
if category and (from_inst or to_inst):
result[(nren_name, year, category)] = (from_inst, to_inst)
create_points_for_year(2019, 68)
create_points_for_year(2020, 46)
create_points_for_year(2021, 24)
create_points_for_year(2022, 2)
return result
def fetch_peak_traffic_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Peak traffic"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(6, 49):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 21, 2):
category = get_category(rows[4][c].value)
from_inst = int(rows[i][c].value) if rows[i][c].value else None
to_inst = int(rows[i][c+1].value) if rows[i][c+1].value else None
if category and (from_inst or to_inst):
result[(nren_name, year, category)] = (from_inst, to_inst)
create_points_for_year(2019, 70)
create_points_for_year(2020, 47)
create_points_for_year(2021, 24)
create_points_for_year(2022, 2)
return result
def fetch_remote_campuses_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Foreign Campuses"
ws = wb[sheet_name]
rows = list(ws.rows)
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
have_remote = rows[i][start_column + 1].value
connectivity = rows[i][start_column + 2].value
country = rows[i][start_column + 3].value
connected_to_r_e = rows[i][start_column + 4].value
if have_remote and have_remote.upper() == "YES":
connectivity = connectivity.upper() == "YES" if connectivity else False
connected_to_r_e = connected_to_r_e not in [None, "-", "Not connected.", "We do not know"]
country = country or ""
yield nren_name, year, connectivity, country, connected_to_r_e
yield from create_points_for_year(2019, 22)
yield from create_points_for_year(2020, 16)
yield from create_points_for_year(2021, 10)
yield from create_points_for_year(2022, 4)
......@@ -22,6 +22,8 @@ def get_uppercase_nren_dict():
# add aliases that are used in the source data:
nren_dict['ASNET'] = nren_dict['ASNET-AM']
nren_dict['KIFU (NIIF)'] = nren_dict['KIFU']
nren_dict['KIFÜ'] = nren_dict['KIFU']
nren_dict['NIIF/HUNGARNET'] = nren_dict['KIFU']
nren_dict['SURFNET'] = nren_dict['SURF']
nren_dict['UOM/RICERKANET'] = nren_dict['UNIVERSITY OF MALTA']
nren_dict['UOM'] = nren_dict['UNIVERSITY OF MALTA']
......
......@@ -269,18 +269,187 @@ def db_nren_services_migration(nren_dict):
db.session.commit()
def db_connected_proportion_migration(nren_dict):
remit = excel_parser.fetch_remit_excel_data()
nr_connected = excel_parser.fetch_nr_connected_excel_data()
market_share = excel_parser.fetch_market_share_excel_data()
users_served = excel_parser.fetch_users_served_excel_data()
all_entry_keys = set()
all_entry_keys.update(remit.keys())
all_entry_keys.update(nr_connected.keys())
all_entry_keys.update(market_share.keys())
all_entry_keys.update(users_served.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connected_proportion = presentation_models.ConnectedProportion(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
coverage=remit.get(key),
number_connected=nr_connected.get(key),
market_share=market_share.get(key),
users_served=users_served.get(key)
)
db.session.merge(connected_proportion)
db.session.commit()
def db_connectivity_level_migration(nren_dict):
typical_speed = excel_parser.fetch_typical_speed_excel_data()
highest_speed = excel_parser.fetch_highest_speed_excel_data()
highest_speed_proportion = excel_parser.fetch_highest_speed_proportion_excel_data()
all_entry_keys = set()
all_entry_keys.update(typical_speed.keys())
all_entry_keys.update(highest_speed.keys())
all_entry_keys.update(highest_speed_proportion.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connected_proportion = presentation_models.ConnectivityLevel(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
typical_speed=typical_speed.get(key),
highest_speed=highest_speed.get(key),
highest_speed_proportion=highest_speed_proportion.get(key)
)
db.session.merge(connected_proportion)
db.session.commit()
def db_connection_carrier_migration(nren_dict):
carriers = excel_parser.fetch_carriers_excel_data()
for key, carry_mechanism in carriers.items():
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connection_carrier = presentation_models.ConnectionCarrier(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
carry_mechanism=carry_mechanism
)
db.session.merge(connection_carrier)
db.session.commit()
def db_connectivity_growth_migration(nren_dict):
growth = excel_parser.fetch_growth_excel_data()
for key, growth_percent in growth.items():
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connectivity_growth = presentation_models.ConnectivityGrowth(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
growth=growth_percent
)
db.session.merge(connectivity_growth)
db.session.commit()
def db_connectivity_load_migration(nren_dict):
average = excel_parser.fetch_average_traffic_excel_data()
peak = excel_parser.fetch_peak_traffic_excel_data()
all_entry_keys = set()
all_entry_keys.update(average.keys())
all_entry_keys.update(peak.keys())
for key in all_entry_keys:
(abbrev, year, user_category) = key
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
connectivity_load = presentation_models.ConnectivityLoad(
nren=nren,
nren_id=nren.id,
year=year,
user_category=user_category,
average_load_from_institutions=average.get(key, (None, None))[0],
average_load_to_institutions=average.get(key, (None, None))[1],
peak_load_from_institutions=peak.get(key, (None, None))[0],
peak_load_to_institutions=peak.get(key, (None, None))[1]
)
db.session.merge(connectivity_load)
db.session.commit()
def db_remote_campuses_migration(nren_dict):
campuses = excel_parser.fetch_remote_campuses_excel_data()
for (abbrev, year, connectivity, country, connected_to_r_e) in campuses:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
connections = []
if country:
connections.append({'country': country, 'local_r_and_e_connection': connected_to_r_e})
nren = nren_dict[abbrev]
connection_carrier = presentation_models.RemoteCampuses(
nren=nren,
nren_id=nren.id,
year=year,
remote_campus_connectivity=connectivity,
connections=connections
)
db.session.merge(connection_carrier)
db.session.commit()
def _cli(app):
with app.app_context():
nren_dict = helpers.get_uppercase_nren_dict()
db_budget_migration(nren_dict)
db_funding_migration(nren_dict)
db_charging_structure_migration(nren_dict)
db_staffing_migration(nren_dict)
db_ecprojects_migration(nren_dict)
db_organizations_migration(nren_dict)
db_traffic_volume_migration(nren_dict)
db_services_migration()
db_nren_services_migration(nren_dict)
# db_budget_migration(nren_dict)
# db_funding_migration(nren_dict)
# db_charging_structure_migration(nren_dict)
# db_staffing_migration(nren_dict)
# db_ecprojects_migration(nren_dict)
# db_organizations_migration(nren_dict)
# db_traffic_volume_migration(nren_dict)
# db_services_migration()
# db_nren_services_migration(nren_dict)
# db_connected_proportion_migration(nren_dict)
# db_connectivity_level_migration(nren_dict)
db_connection_carrier_migration(nren_dict)
db_connectivity_growth_migration(nren_dict)
db_connectivity_load_migration(nren_dict)
db_remote_campuses_migration(nren_dict)
@click.command()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment