-
Remco Tukker authoredRemco Tukker authored
excel_parser.py 40.22 KiB
import logging
import openpyxl
from compendium_v2.conversion import mapping
from compendium_v2.db.presentation_model_enums import CarryMechanism, ConnectivityCoverage, MonitoringMethod, \
UserCategory, FeeType, YesNoPlanned
from compendium_v2.environment import setup_logging
from compendium_v2.resources import get_resource_file_path
setup_logging()
logger = logging.getLogger(__name__)
EXCEL_FILE_ORGANISATION = get_resource_file_path("2021_Organisation_DataSeries.xlsx")
EXCEL_FILE_USERS = get_resource_file_path("2022_Connected_Users_DataSeries.xlsx")
EXCEL_FILE_NETWORKS = get_resource_file_path("2022_Networks_DataSeries.xlsx")
EXCEL_FILE_NREN_SERVICES = get_resource_file_path("NREN-Services-prefills_2023_Recovered.xlsx")
def fetch_budget_excel_data():
# load the xlsx file
sheet_name = "1. Budget"
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
ws = wb[sheet_name]
# iterate over the rows in the worksheet
for row in range(14, 58):
for col in range(3, 9):
# extract the data from the row
nren = ws.cell(row=row, column=2).value
budget = ws.cell(row=row, column=col).value
year = ws.cell(row=13, column=col).value
if budget is not None:
budget = round(budget / 1000000, 2)
yield nren.upper(), budget, year
def fetch_funding_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "2. Income Sources"
ws = wb[sheet_name]
ws2 = wb["Income Sources table"]
def hard_number_convert(s, source_name, nren, year):
if s is None:
logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year})')
return float(0)
try:
return float(s)
except ValueError:
logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year}) with value ({s})')
return float(0)
# iterate over the rows in the worksheet
def create_points_for_year_until_2017(start_row, end_row, year, col_nren, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_nren).value
client_institution = ws.cell(row=row, column=col_start).value
commercial = ws.cell(row=row, column=col_start + 1).value
geant_subsidy = ws.cell(row=row, column=col_start + 2).value
gov_public_bodies = ws.cell(row=row, column=col_start + 3).value
other_european_funding = ws.cell(row=row, column=col_start + 4).value
other = ws.cell(row=row, column=col_start + 5).value
client_institution = hard_number_convert(client_institution, "client institution", nren, year)
commercial = hard_number_convert(commercial, "commercial", nren, year)
geant_subsidy = hard_number_convert(geant_subsidy, "geant subsidy", nren, year)
gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
other_european_funding = hard_number_convert(other_european_funding, "other european funding", nren, year)
other = hard_number_convert(other, "other", nren, year)
european_funding = geant_subsidy + other_european_funding
# process the data (e.g. save to database)
if nren is not None:
yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
def create_points_for_year_from_2018(sheet, start_row, end_row, year, nren_col, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = sheet.cell(row=row, column=nren_col).value
client_institution = sheet.cell(row=row, column=col_start).value
european_funding = sheet.cell(row=row, column=col_start + 1).value
gov_public_bodies = sheet.cell(row=row, column=col_start + 2).value
commercial = sheet.cell(row=row, column=col_start + 3).value
other = sheet.cell(row=row, column=col_start + 4).value
client_institution = hard_number_convert(client_institution, "client institution", nren, year)
european_funding = hard_number_convert(european_funding, "european funding", nren, year)
gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
commercial = hard_number_convert(commercial, "commercial", nren, year)
other = hard_number_convert(other, "other", nren, year)
# process the data (e.g. save to database)
if nren is not None:
yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
# For 2016
yield from create_points_for_year_until_2017(8, 51, 2016, 43, 45)
# For 2017
yield from create_points_for_year_until_2017(8, 51, 2017, 32, 35)
# For 2018
yield from create_points_for_year_from_2018(ws, 8, 51, 2018, 21, 24)
# For 2019
yield from create_points_for_year_from_2018(ws, 8, 51, 2019, 12, 15)
# For 2020
yield from create_points_for_year_from_2018(ws, 8, 51, 2020, 3, 6)
# For 2021
yield from create_points_for_year_from_2018(ws2, 8, 51, 2021, 11, 12)
def fetch_charging_structure_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "3. Charging mechanism"
ws = wb[sheet_name]
# iterate over the rows in the worksheet
def create_points_for_2021(start_row, end_row, year, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_start).value
charging_structure = ws.cell(row=row, column=col_start + 1).value
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
if charging_structure is not None:
if "do not charge" in charging_structure:
charging_structure = FeeType.no_charge
elif "combination" in charging_structure:
charging_structure = FeeType.combination
elif "flat" in charging_structure:
charging_structure = FeeType.flat_fee
elif "usage-based" in charging_structure:
charging_structure = FeeType.usage_based_fee
elif "Other" in charging_structure:
charging_structure = FeeType.other
else:
charging_structure = None
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
yield nren.upper(), year, charging_structure
def create_points_for_2019(start_row, end_row, year, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_start).value
charging_structure = ws.cell(row=row, column=col_start + 1).value
structure_2 = ws.cell(row=row, column=col_start + 2).value
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, {structure_2}, Year: {year}')
if charging_structure is not None:
if structure_2 not in [None, '']:
charging_structure = FeeType.other
elif "do not charge" in charging_structure:
charging_structure = FeeType.no_charge
elif "combination" in charging_structure:
charging_structure = FeeType.combination
elif "flat" in charging_structure:
charging_structure = FeeType.flat_fee
elif "usage-based" in charging_structure:
charging_structure = FeeType.usage_based_fee
elif "Other" in charging_structure:
charging_structure = FeeType.other
else:
charging_structure = None
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
yield nren.upper(), year, charging_structure
# For 2021
yield from create_points_for_2021(3, 46, 2021, 2)
# For 2019
yield from create_points_for_2019(3, 46, 2019, 6)
def fetch_staffing_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "4. Staff"
ws = wb[sheet_name]
start_row = 18
end_row = 61
def convert_number(value, nren, year, description):
try:
return float(value)
except (TypeError, ValueError):
logger.info(f'NREN: {nren} year: {year} has {value} for {description}; set to 0.')
return 0
def create_points_for_year(year, nren_column, start_column):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=nren_column).value
permanent = ws.cell(row=row, column=start_column).value
permanent = convert_number(permanent, nren, year, "permanent ftes")
subcontracted = ws.cell(row=row, column=start_column + 1).value
subcontracted = convert_number(subcontracted, nren, year, "subcontractor ftes")
if permanent + subcontracted > 0:
yield nren.upper(), year, permanent, subcontracted
# For 2016
yield from create_points_for_year(2016, 53, 55)
# For 2017
yield from create_points_for_year(2017, 43, 46)
# For 2018
yield from create_points_for_year(2018, 33, 36)
# For 2019
yield from create_points_for_year(2019, 23, 26)
# For 2020
yield from create_points_for_year(2020, 13, 16)
# For 2021
yield from create_points_for_year(2021, 2, 5)
def fetch_staff_function_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "5. Staff by Function"
ws = wb[sheet_name]
start_row = 14
end_row = 58
def convert_number(value, nren, year, description):
try:
return float(value)
except (TypeError, ValueError):
logger.info(f'NREN: {nren} year: {year} has {value} for {description}; set to 0.')
return 0
def read_cell_number(row, column, nren, year, description):
value = ws.cell(row=row, column=column).value
return convert_number(value, nren, year, description)
def create_points_for_year_until_2019(year, nren_column, start_column):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=nren_column).value
if nren is None:
continue
admin = read_cell_number(row, start_column, nren, year, "admin and finance ftes")
communication = read_cell_number(row, start_column + 1, nren, year, "communication ftes")
infosec = read_cell_number(row, start_column + 2, nren, year, "infosec ftes")
it = read_cell_number(row, start_column + 3, nren, year, "it and software dev ftes")
noc = read_cell_number(row, start_column + 4, nren, year, "NOC and engineering ftes")
others = read_cell_number(row, start_column + 5, nren, year, "other ftes")
technical = infosec + it + noc
non_technical = admin + communication + others
if technical + non_technical > 0:
yield nren.upper(), year, technical, non_technical
def create_points_for_year(year, nren_column, start_column):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=nren_column).value
if nren is None:
continue
technical = read_cell_number(row, start_column, nren, year, "technical ftes")
non_technical = read_cell_number(row, start_column + 1, nren, year, "non-technical ftes")
if technical + non_technical > 0:
yield nren.upper(), year, technical, non_technical
# For 2017
yield from create_points_for_year_until_2019(2017, 41, 43)
# For 2018
yield from create_points_for_year_until_2019(2018, 31, 33)
# For 2019
yield from create_points_for_year_until_2019(2019, 20, 22)
# For 2020
yield from create_points_for_year(2020, 12, 14)
# For 2021
yield from create_points_for_year(2021, 3, 5)
def fetch_ecproject_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "7. EC Projects"
ws = wb[sheet_name]
start_row = 6
def create_points_for_year(year, start_column, end_row):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=start_column).value
if nren is None:
continue
project = ws.cell(row=row, column=start_column + 1).value
if project is None:
continue
yield nren.upper(), year, project
yield from create_points_for_year(2017, 13, 165)
yield from create_points_for_year(2018, 10, 165)
yield from create_points_for_year(2019, 7, 165)
yield from create_points_for_year(2020, 4, 180)
yield from create_points_for_year(2021, 1, 173)
def fetch_organization_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_ORGANISATION, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "Organization"
ws = wb[sheet_name]
# iterate over the rows in the worksheet
for row in range(5, 48):
# extract the data from the row
nren = ws.cell(row=row, column=2).value
parent_org = ws.cell(row=row, column=4).value
if parent_org not in [None, 'NA', 'N/A']:
yield nren.upper(), 2021, parent_org
def fetch_traffic_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "Estimated_Traffic TByte"
ws = wb[sheet_name]
rows = list(ws.rows)
def convert_number(value, nren, year, description):
if value is None or value == '--' or value == 'No data':
return 0
try:
return float(value)
except (TypeError, ValueError):
logger.info(f'NREN: {nren} year: {year} has {value} for {description}; set to 0.')
return 0
def create_points_for_year(year, start_column):
for i in range(6, 49):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
from_external = convert_number(rows[i][start_column + 1].value, nren_name, year, 'from_external')
to_external = convert_number(rows[i][start_column + 2].value, nren_name, year, 'to_external')
from_customer = convert_number(rows[i][start_column + 3].value, nren_name, year, 'from_customer')
to_customer = convert_number(rows[i][start_column + 4].value, nren_name, year, 'to_customer')
if from_external == 0 and to_external == 0 and from_customer == 0 and to_customer == 0:
continue
yield nren_name, year, from_external, to_external, from_customer, to_customer
yield from create_points_for_year(2016, 38)
yield from create_points_for_year(2017, 32)
yield from create_points_for_year(2018, 26)
yield from create_points_for_year(2019, 20)
yield from create_points_for_year(2020, 14)
yield from create_points_for_year(2021, 8)
yield from create_points_for_year(2022, 2)
def fetch_nren_services_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NREN_SERVICES, data_only=True, read_only=True)
ws = wb["Sheet1"]
rows = list(ws.rows)
titles = rows[0]
nren_service_data_columns = {}
def normalize_nren_name(n: str) -> str:
n = n.split(' ')[0].upper()
return {'KIFÜ': 'KIFU', 'AZSCIENCENET': 'ANAS', 'PSNC': 'PIONIER'}.get(n, n)
for i in range(0, 131):
if titles[i].value:
name = normalize_nren_name(titles[i].value)
nren_service_data_columns[name] = i
for nren_name, start_column in nren_service_data_columns.items():
for row_index in range(2, 61):
row = rows[row_index]
service_name = row[0].value
if row[start_column].value and row[start_column].value.upper() == 'YES':
product_name = ''
additional_information = ''
if row[start_column + 1].value:
product_name = row[start_column + 1].value
if row[start_column + 2].value:
additional_information = row[start_column + 2].value
service_category, service_name_key = mapping.SERVICES_MAPPING[service_name].split(':')
service_category = mapping.SERVICE_CATEGORY_MAPPING[service_category]
yield {
'nren_name': nren_name,
'service_name': service_name,
'service_category': service_category,
'service_name_key': service_name_key,
'year': 2022,
'product_name': product_name.strip(),
'additional_information': additional_information.strip(),
'official_description': '',
}
def get_category(excel_cat):
if not excel_cat:
return None
if "universit" in excel_cat.lower():
return UserCategory.universities
if "research ins" in excel_cat.lower():
return UserCategory.institutes
if "further" in excel_cat.lower() or "fe" == excel_cat.lower():
return UserCategory.further_education
if "inter" in excel_cat.lower():
return UserCategory.iros
if "cultural" in excel_cat.lower() or "librar" in excel_cat.lower():
return UserCategory.cultural
if "hospital" in excel_cat.lower():
return UserCategory.hospitals
if "primary" in excel_cat.lower():
return UserCategory.primary_schools
if "secondary" in excel_cat.lower():
return UserCategory.secondary_schools
if "govern" in excel_cat.lower():
return UserCategory.government
if "profit" in excel_cat.lower():
return UserCategory.for_profit_orgs
logger.warning(f'unknown user category: {excel_cat}')
def fetch_remit_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Connectivity Remit"
ws = wb[sheet_name]
rows = list(ws.rows)
def get_remit(excel_remit):
if not excel_remit:
return None
if "including transit" in excel_remit.lower():
return ConnectivityCoverage.yes_incl_other
if "national nren" in excel_remit.lower():
return ConnectivityCoverage.yes_national_nren
if "some circ" in excel_remit.lower():
return ConnectivityCoverage.sometimes
if "policy reas" in excel_remit.lower():
return ConnectivityCoverage.no_policy
if "financial" in excel_remit.lower():
return ConnectivityCoverage.no_financial
if "other reason" in excel_remit.lower():
return ConnectivityCoverage.no_other
if "unsure" in excel_remit.lower():
return ConnectivityCoverage.unsure
logger.warning(f'unknown remit: {excel_remit}')
result = {}
def create_points_for_year(year, start_column):
for i in range(8, 51):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for col in range(start_column + 2, start_column + 21, 2):
c = col
if year == 2021 and col > 30:
c += 2
category = get_category(rows[7][c].value)
remit = get_remit(rows[i][c].value)
if category and remit:
result[(nren_name, year, category)] = remit
create_points_for_year(2019, 72)
create_points_for_year(2020, 50)
create_points_for_year(2021, 26)
create_points_for_year(2022, 3)
return result
def fetch_nr_connected_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Connected Institutions"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
nr_connected = int(rows[i][c].value) if rows[i][c].value else None
if category and nr_connected:
result[(nren_name, year, category)] = nr_connected
create_points_for_year(2019, 39)
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_market_share_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table Market Share"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(8, 51):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[7][c].value)
percentage_connected = float(rows[i][c].value) if rows[i][c].value else None
if category and percentage_connected:
result[(nren_name, year, category)] = percentage_connected
create_points_for_year(2017, 64)
create_points_for_year(2018, 52)
create_points_for_year(2019, 40)
create_points_for_year(2020, 28)
create_points_for_year(2021, 16)
create_points_for_year(2022, 3)
return result
def fetch_users_served_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Users"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(4, 47):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[3][c].value)
users_connected = int(rows[i][c].value) if rows[i][c].value else None
if category and users_connected:
result[(nren_name, year, category)] = users_connected
create_points_for_year(2019, 40)
create_points_for_year(2020, 28)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_typical_speed_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table _Typical IP Link capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(33, 76):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[32][c].value)
typical_speed = int(rows[i][c].value) if rows[i][c].value else None
if category and typical_speed:
result[(nren_name, year, category)] = typical_speed
create_points_for_year(2017, 75)
create_points_for_year(2018, 50)
create_points_for_year(2019, 38)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_highest_speed_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Table _Highest IP Link capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(33, 76):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[32][c].value)
highest_speed = int(rows[i][c].value) if rows[i][c].value else None
if category and highest_speed:
result[(nren_name, year, category)] = highest_speed
create_points_for_year(2017, 64)
create_points_for_year(2018, 51)
create_points_for_year(2019, 38)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_highest_speed_proportion_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Aver High cap conn Share"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
highest_speed = float(rows[i][c].value) if rows[i][c].value else None
if category and highest_speed:
result[(nren_name, year, category)] = highest_speed
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_carriers_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Traffic carriers"
ws = wb[sheet_name]
rows = list(ws.rows)
def get_carrier(excel_carrier):
if not excel_carrier:
return None
if "comme" in excel_carrier.lower():
return CarryMechanism.commercial_provider_backbone
if "man" in excel_carrier.lower():
return CarryMechanism.man
if "local loop" in excel_carrier.lower():
return CarryMechanism.nren_local_loops
if "other" in excel_carrier.lower():
return CarryMechanism.other
if "regional" in excel_carrier.lower():
return CarryMechanism.regional_nren_backbone
logger.warning(f'unknown carrier: {excel_carrier}')
result = {}
def create_points_for_year(year, start_column):
for i in range(3, 46):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[2][c].value)
carrier = get_carrier(rows[i][c].value)
if category and carrier:
result[(nren_name, year, category)] = carrier
create_points_for_year(2019, 40)
create_points_for_year(2020, 27)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_growth_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Table Traffic Growth % "
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 46):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 11):
category = get_category(rows[4][c].value)
growth = float(rows[i][c].value) if rows[i][c].value else None
if category and growth:
result[(nren_name, year, category)] = growth
create_points_for_year(2019, 40)
create_points_for_year(2020, 26)
create_points_for_year(2021, 14)
create_points_for_year(2022, 2)
return result
def fetch_average_traffic_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Average Traffic"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 21, 2):
category = get_category(rows[3][c].value)
from_inst = int(rows[i][c].value) if rows[i][c].value else None
to_inst = int(rows[i][c+1].value) if rows[i][c+1].value else None
if category and (from_inst or to_inst):
result[(nren_name, year, category)] = (from_inst, to_inst)
create_points_for_year(2019, 68)
create_points_for_year(2020, 46)
create_points_for_year(2021, 24)
create_points_for_year(2022, 2)
return result
def fetch_peak_traffic_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Peak traffic"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(6, 49):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
for c in range(start_column + 1, start_column + 21, 2):
category = get_category(rows[4][c].value)
from_inst = int(rows[i][c].value) if rows[i][c].value else None
to_inst = int(rows[i][c+1].value) if rows[i][c+1].value else None
if category and (from_inst or to_inst):
result[(nren_name, year, category)] = (from_inst, to_inst)
create_points_for_year(2019, 70)
create_points_for_year(2020, 47)
create_points_for_year(2021, 24)
create_points_for_year(2022, 2)
return result
def fetch_remote_campuses_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_USERS, data_only=True, read_only=True)
sheet_name = "Foreign Campuses"
ws = wb[sheet_name]
rows = list(ws.rows)
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
have_remote = rows[i][start_column + 1].value
connectivity = rows[i][start_column + 2].value
country = rows[i][start_column + 3].value
connected_to_r_e = rows[i][start_column + 4].value
if have_remote and have_remote.upper() == "YES":
connectivity = connectivity.upper() == "YES" if connectivity else False
connected_to_r_e = connected_to_r_e not in [None, "-", "Not connected.", "We do not know"]
country = country or ""
yield nren_name, year, connectivity, country, connected_to_r_e
yield from create_points_for_year(2019, 22)
yield from create_points_for_year(2020, 16)
yield from create_points_for_year(2021, 10)
yield from create_points_for_year(2022, 4)
def fetch_dark_fibre_iru_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Dark Fibre"
ws = wb[sheet_name]
rows = list(ws.rows)
def parse_int(excel_value):
if excel_value is None or excel_value == "":
return None
return int(str(excel_value).replace("-", "").replace(" ", "").replace(".", ""))
def create_points_for_year(year, start_column):
for i in range(10, 53):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
s = start_column
iru = ""
if year > 2019:
s += 1
iru = rows[i][s].value
elif parse_int(rows[i][s + 1].value) is not None:
iru = "Yes" if parse_int(rows[i][s + 1].value) else "No"
if iru:
length_in_country = parse_int(rows[i][s + 1].value)
length_out_country = parse_int(rows[i][s + 3].value)
iru = iru == "Yes"
yield nren_name, year, iru, length_in_country, length_out_country
yield from create_points_for_year(2016, 42)
yield from create_points_for_year(2017, 36)
yield from create_points_for_year(2018, 30)
yield from create_points_for_year(2019, 24)
yield from create_points_for_year(2020, 17)
yield from create_points_for_year(2021, 10)
yield from create_points_for_year(2022, 3)
def fetch_dark_fibre_installed_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Dark Fibre"
ws = wb[sheet_name]
rows = list(ws.rows)
def parse_int(excel_value):
if excel_value is None or excel_value == "":
return None
return int(str(excel_value).replace("-", "").replace(" ", "").replace(".", ""))
def create_points_for_year(year, start_column):
for i in range(10, 53):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
s = start_column
if year > 2019:
s += 1
installed_length = parse_int(rows[i][s + 2].value)
if installed_length is not None:
installed = bool(installed_length)
yield nren_name, year, installed, installed_length
yield from create_points_for_year(2016, 42)
yield from create_points_for_year(2017, 36)
yield from create_points_for_year(2018, 30)
yield from create_points_for_year(2019, 24)
yield from create_points_for_year(2020, 17)
yield from create_points_for_year(2021, 10)
yield from create_points_for_year(2022, 3)
def fetch_iru_duration_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "IRU duration"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(3, 46):
nren_name = rows[i][start_column].value
if not nren_name:
continue
nren_name = nren_name.upper()
years = rows[i][start_column + 1].value
if not years:
continue
years = str(years).split(" ")[0].split("+")[0].split("-")[0]
if not years:
continue
try:
years = int(years)
except ValueError:
logger.warning(f'Invalid iru duration Value :{nren_name} ({year}) with value ({years})')
continue
result[(nren_name, year)] = years
create_points_for_year(2019, 10)
create_points_for_year(2020, 7)
create_points_for_year(2021, 4)
create_points_for_year(2022, 1)
return result
def fetch_passive_monitoring_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Traffic monitoring"
ws = wb[sheet_name]
rows = list(ws.rows)
def create_points_for_year(year, start_column):
for i in range(6, 48):
nren_name = rows[i][1].value
if not nren_name:
continue
nren_name = nren_name.upper()
monitoring = rows[i][start_column].value
method = rows[i][start_column + 1].value
if monitoring:
monitoring = monitoring == "Yes"
method = {
"SPAN ports": MonitoringMethod.span_ports,
"Passive optical TAPS": MonitoringMethod.taps,
"Both": MonitoringMethod.both,
None: None
}[method]
yield nren_name, year, monitoring, method
yield from create_points_for_year(2021, 4)
yield from create_points_for_year(2022, 2)
def fetch_largest_link_capacity_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Largest IP Trunk capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 47):
nren_name = rows[i][5].value
if not nren_name:
continue
nren_name = nren_name.upper()
largest_capacity = rows[i][start_column].value
if largest_capacity:
result[(nren_name, year)] = int(largest_capacity)
create_points_for_year(2016, 12)
create_points_for_year(2017, 11)
create_points_for_year(2018, 10)
create_points_for_year(2019, 9)
create_points_for_year(2020, 8)
create_points_for_year(2021, 7)
create_points_for_year(2022, 6)
return result
def fetch_typical_backbone_capacity_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Typical IP Trunk capacity"
ws = wb[sheet_name]
rows = list(ws.rows)
result = {}
def create_points_for_year(year, start_column):
for i in range(5, 47):
nren_name = rows[i][4].value
if not nren_name:
continue
nren_name = nren_name.upper()
typical_capacity = rows[i][start_column].value
if typical_capacity:
result[(nren_name, year)] = int(typical_capacity)
create_points_for_year(2016, 11)
create_points_for_year(2017, 10)
create_points_for_year(2018, 9)
create_points_for_year(2019, 8)
create_points_for_year(2020, 7)
create_points_for_year(2021, 6)
create_points_for_year(2022, 5)
return result
def fetch_non_r_e_peers_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Peering-Non R& Network"
ws = wb[sheet_name]
rows = list(ws.rows)
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][2].value
if not nren_name:
continue
nren_name = nren_name.upper()
nr_peers = rows[i][start_column].value
if nr_peers:
yield nren_name, year, int(nr_peers)
yield from create_points_for_year(2016, 10)
yield from create_points_for_year(2017, 9)
yield from create_points_for_year(2018, 8)
yield from create_points_for_year(2019, 7)
yield from create_points_for_year(2020, 6)
yield from create_points_for_year(2021, 5)
yield from create_points_for_year(2022, 3)
def fetch_ops_automation_excel_data():
wb = openpyxl.load_workbook(EXCEL_FILE_NETWORKS, data_only=True, read_only=True)
sheet_name = "Automation"
ws = wb[sheet_name]
rows = list(ws.rows)
def create_points_for_year(year, start_column):
for i in range(5, 48):
nren_name = rows[i][1].value
if not nren_name:
continue
nren_name = nren_name.upper()
automation = rows[i][start_column].value
specifics = rows[i][start_column + 1].value or ""
if automation:
automation = YesNoPlanned[automation.lower()]
yield nren_name, year, automation, specifics
yield from create_points_for_year(2021, 5)
yield from create_points_for_year(2022, 3)