Skip to content
Snippets Groups Projects
Commit fef8db90 authored by Mohammad Torkashvand's avatar Mohammad Torkashvand
Browse files

refactor publisher and make excel files consistent

parent d6f91bd5
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !78. Comments created here will be created in the context of that merge request.
...@@ -30,7 +30,9 @@ setup_logging() ...@@ -30,7 +30,9 @@ setup_logging()
logger = logging.getLogger('conversion') logger = logging.getLogger('conversion')
EXCEL_FILE = os.path.join(os.path.dirname(__file__), "NREN-Services-prefills_2023_Recovered.xlsx") resources_dir = f"{os.path.abspath(os.path.join( os.path.dirname(__file__), os.pardir))}/resources"
EXCEL_NREN_SERVICES_2023 = os.path.join(resources_dir, "NREN-Services-prefills_2023_Recovered.xlsx")
def query_nren(nren_id: int): def query_nren(nren_id: int):
...@@ -112,7 +114,7 @@ def convert_answers(answers): ...@@ -112,7 +114,7 @@ def convert_answers(answers):
def load_service_data(): def load_service_data():
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_NREN_SERVICES_2023, data_only=True, read_only=True)
ws = wb["Sheet1"] ws = wb["Sheet1"]
rows = list(ws.rows) rows = list(ws.rows)
......
...@@ -9,14 +9,17 @@ setup_logging() ...@@ -9,14 +9,17 @@ setup_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2021_Organisation_DataSeries.xlsx") resources_dir = f"{os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))}/resources"
NETWORK_EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2022_Networks_DataSeries.xlsx")
EXCEL_ORGANISATION_2021 = os.path.join(resources_dir, "2021_Organisation_DataSeries.xlsx")
EXCEL_CONNECTED_USERS_2022 = os.path.join(resources_dir, "2022_Connected_Users_DataSeries.xlsx")
EXCEL_NETWORKS_2022 = os.path.join(os.path.dirname(__file__), "2022_Networks_DataSeries.xlsx")
def fetch_budget_excel_data():
def fetch_budget_data():
# load the xlsx file # load the xlsx file
sheet_name = "1. Budget" sheet_name = "1. Budget"
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
ws = wb[sheet_name] ws = wb[sheet_name]
...@@ -34,9 +37,9 @@ def fetch_budget_excel_data(): ...@@ -34,9 +37,9 @@ def fetch_budget_excel_data():
yield nren.upper(), budget, year yield nren.upper(), budget, year
def fetch_funding_excel_data(): def fetch_funding_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "2. Income Sources" sheet_name = "2. Income Sources"
...@@ -117,9 +120,9 @@ def fetch_funding_excel_data(): ...@@ -117,9 +120,9 @@ def fetch_funding_excel_data():
yield from create_points_for_year_from_2018(ws2, 8, 51, 2021, 11, 12) yield from create_points_for_year_from_2018(ws2, 8, 51, 2021, 11, 12)
def fetch_charging_structure_excel_data(): def fetch_charging_structure_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "3. Charging mechanism" sheet_name = "3. Charging mechanism"
...@@ -184,9 +187,9 @@ def fetch_charging_structure_excel_data(): ...@@ -184,9 +187,9 @@ def fetch_charging_structure_excel_data():
yield from create_points_for_2019(3, 46, 2019, 6) yield from create_points_for_2019(3, 46, 2019, 6)
def fetch_staffing_excel_data(): def fetch_staffing_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "4. Staff" sheet_name = "4. Staff"
...@@ -232,9 +235,9 @@ def fetch_staffing_excel_data(): ...@@ -232,9 +235,9 @@ def fetch_staffing_excel_data():
yield from create_points_for_year(2021, 2, 5) yield from create_points_for_year(2021, 2, 5)
def fetch_staff_function_excel_data(): def fetch_staff_function_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "5. Staff by Function" sheet_name = "5. Staff by Function"
...@@ -300,9 +303,9 @@ def fetch_staff_function_excel_data(): ...@@ -300,9 +303,9 @@ def fetch_staff_function_excel_data():
yield from create_points_for_year(2021, 3, 5) yield from create_points_for_year(2021, 3, 5)
def fetch_ecproject_excel_data(): def fetch_ecproject_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "7. EC Projects" sheet_name = "7. EC Projects"
...@@ -332,9 +335,9 @@ def fetch_ecproject_excel_data(): ...@@ -332,9 +335,9 @@ def fetch_ecproject_excel_data():
yield from create_points_for_year(2021, 1, 173) yield from create_points_for_year(2021, 1, 173)
def fetch_organization_excel_data(): def fetch_organization_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_ORGANISATION_2021, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "Organization" sheet_name = "Organization"
...@@ -352,7 +355,7 @@ def fetch_organization_excel_data(): ...@@ -352,7 +355,7 @@ def fetch_organization_excel_data():
def fetch_traffic_excel_data(): def fetch_traffic_excel_data():
# load the xlsx file # load the xlsx file
wb = openpyxl.load_workbook(NETWORK_EXCEL_FILE, data_only=True, read_only=True) wb = openpyxl.load_workbook(EXCEL_NETWORKS_2022, data_only=True, read_only=True)
# select the active worksheet # select the active worksheet
sheet_name = "Estimated_Traffic TByte" sheet_name = "Estimated_Traffic TByte"
......
""" """
survey_publisher_v1 excel_publisher
========================= =========================
This module loads the survey data from before 2022 from an excel file. This module loads the survey data from before 2022 from an excel file.
...@@ -16,15 +16,14 @@ from sqlalchemy import select ...@@ -16,15 +16,14 @@ from sqlalchemy import select
import compendium_v2 import compendium_v2
from compendium_v2.environment import setup_logging from compendium_v2.environment import setup_logging
from compendium_v2.background_task import parse_excel_data
from compendium_v2.config import load from compendium_v2.config import load
from compendium_v2.db import db, model from compendium_v2.db import db, model
from compendium_v2.survey_db import model as survey_model from compendium_v2.survey_db import model as survey_model
from compendium_v2.publishers import helpers from compendium_v2.publishers import helpers, excel_parser
setup_logging() setup_logging()
logger = logging.getLogger('survey-publisher-v1') logger = logging.getLogger(__name__)
def db_budget_migration(nren_dict): def db_budget_migration(nren_dict):
...@@ -55,7 +54,7 @@ def db_budget_migration(nren_dict): ...@@ -55,7 +54,7 @@ def db_budget_migration(nren_dict):
db.session.merge(budget_entry) db.session.merge(budget_entry)
# Import the data from excel sheet to database # Import the data from excel sheet to database
exceldata = parse_excel_data.fetch_budget_excel_data() exceldata = excel_parser.fetch_budget_data()
for abbrev, budget, year in exceldata: for abbrev, budget, year in exceldata:
if abbrev not in nren_dict: if abbrev not in nren_dict:
...@@ -77,7 +76,7 @@ def db_budget_migration(nren_dict): ...@@ -77,7 +76,7 @@ def db_budget_migration(nren_dict):
def db_funding_migration(nren_dict): def db_funding_migration(nren_dict):
# Import the data to database # Import the data to database
data = parse_excel_data.fetch_funding_excel_data() data = excel_parser.fetch_funding_data()
for (abbrev, year, client_institution, for (abbrev, year, client_institution,
european_funding, european_funding,
...@@ -108,7 +107,7 @@ def db_funding_migration(nren_dict): ...@@ -108,7 +107,7 @@ def db_funding_migration(nren_dict):
def db_charging_structure_migration(nren_dict): def db_charging_structure_migration(nren_dict):
# Import the data to database # Import the data to database
data = parse_excel_data.fetch_charging_structure_excel_data() data = excel_parser.fetch_charging_structure_data()
for (abbrev, year, charging_structure) in data: for (abbrev, year, charging_structure) in data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
...@@ -126,7 +125,7 @@ def db_charging_structure_migration(nren_dict): ...@@ -126,7 +125,7 @@ def db_charging_structure_migration(nren_dict):
def db_staffing_migration(nren_dict): def db_staffing_migration(nren_dict):
staff_data = parse_excel_data.fetch_staffing_excel_data() staff_data = excel_parser.fetch_staffing_data()
nren_staff_map = {} nren_staff_map = {}
for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data: for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data:
...@@ -145,7 +144,7 @@ def db_staffing_migration(nren_dict): ...@@ -145,7 +144,7 @@ def db_staffing_migration(nren_dict):
non_technical_fte=0 non_technical_fte=0
) )
function_data = parse_excel_data.fetch_staff_function_excel_data() function_data = excel_parser.fetch_staff_function_data()
for (abbrev, year, technical_fte, non_technical_fte) in function_data: for (abbrev, year, technical_fte, non_technical_fte) in function_data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping staff function data.') logger.warning(f'{abbrev} unknown. Skipping staff function data.')
...@@ -179,7 +178,7 @@ def db_staffing_migration(nren_dict): ...@@ -179,7 +178,7 @@ def db_staffing_migration(nren_dict):
def db_ecprojects_migration(nren_dict): def db_ecprojects_migration(nren_dict):
ecproject_data = parse_excel_data.fetch_ecproject_excel_data() ecproject_data = excel_parser.fetch_ecproject_data()
for (abbrev, year, project) in ecproject_data: for (abbrev, year, project) in ecproject_data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
...@@ -192,7 +191,7 @@ def db_ecprojects_migration(nren_dict): ...@@ -192,7 +191,7 @@ def db_ecprojects_migration(nren_dict):
def db_organizations_migration(nren_dict): def db_organizations_migration(nren_dict):
organization_data = parse_excel_data.fetch_organization_excel_data() organization_data = excel_parser.fetch_organization_data()
for (abbrev, year, org) in organization_data: for (abbrev, year, org) in organization_data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
......
import os
from sqlalchemy import select, func
from compendium_v2 import db
from compendium_v2.db import model
from compendium_v2.publishers.excel_publisher import _cli
EXCEL_FILE = os.path.join(os.path.dirname(__file__), "data", "2021_Organisation_DataSeries.xlsx")
def test_publisher(app_with_survey_db, mocker, dummy_config):
mocker.patch('compendium_v2.publishers.excel_parser.EXCEL_ORGANISATION_2021', EXCEL_FILE)
with app_with_survey_db.app_context():
nren_names = ['SURF', 'KIFU', 'University of Malta', 'ASNET-AM', 'SIKT', 'LAT', 'RASH', 'ANAS', 'GRNET', 'CSC']
db.session.add_all([model.NREN(name=nren_name, country='country') for nren_name in nren_names])
db.session.commit()
_cli(dummy_config, app_with_survey_db)
with app_with_survey_db.app_context():
budget_count = db.session.scalar(select(func.count(model.BudgetEntry.year)))
assert budget_count
funding_source_count = db.session.scalar(select(func.count(model.FundingSource.year)))
assert funding_source_count
charging_structure_count = db.session.scalar(select(func.count(model.ChargingStructure.year)))
assert charging_structure_count
staff_data = db.session.scalars(select(model.NrenStaff).order_by(model.NrenStaff.year.asc())).all()
# data should only be saved for the NRENs we have saved in the database
staff_data_nrens = set([staff.nren.name for staff in staff_data])
assert len(staff_data_nrens) == len(nren_names) - 1 # no UoM data
kifu_data = [staff for staff in staff_data if staff.nren.name == 'KIFU']
# check that the data is saved correctly for KIFU, it should be OK for the rest then..
assert len(kifu_data) == 6
assert kifu_data[0].year == 2016
assert kifu_data[0].permanent_fte == 100
assert kifu_data[0].subcontracted_fte == 2
assert kifu_data[0].technical_fte == 0
assert kifu_data[0].non_technical_fte == 0
assert kifu_data[1].year == 2017
assert kifu_data[1].permanent_fte == 80
assert kifu_data[1].subcontracted_fte == 2
assert kifu_data[1].technical_fte == 0
assert kifu_data[1].non_technical_fte == 0
assert kifu_data[2].year == 2018
assert kifu_data[2].permanent_fte == 80
assert kifu_data[2].subcontracted_fte == 3
assert kifu_data[2].technical_fte == 0
assert kifu_data[2].non_technical_fte == 0
assert kifu_data[3].year == 2019
assert kifu_data[3].permanent_fte == 148
assert kifu_data[3].subcontracted_fte == 4
assert kifu_data[3].technical_fte == 117
assert kifu_data[3].non_technical_fte == 33
assert kifu_data[4].year == 2020
assert kifu_data[4].permanent_fte == 190
assert kifu_data[4].subcontracted_fte == 3
assert kifu_data[4].technical_fte == 133
assert kifu_data[4].non_technical_fte == 60
assert kifu_data[5].year == 2021
assert kifu_data[5].permanent_fte == 178
assert kifu_data[5].subcontracted_fte == 3
assert kifu_data[5].technical_fte == 133
assert kifu_data[5].non_technical_fte == 45
ecproject_data = db.session.scalars(select(model.ECProject)).all()
# test a couple of random entries
surf2017 = [x for x in ecproject_data if x.nren.name == 'SURF' and x.year == 2017]
assert len(surf2017) == 1
assert surf2017[0].project == 'Asterics and Magic'
asnetam2018 = [x for x in ecproject_data if x.nren.name == 'ASNET-AM' and x.year == 2018]
assert len(asnetam2018) == 1
assert asnetam2018[0].project == 'EaPConnect'
kifu2019 = [x for x in ecproject_data if x.nren.name == 'KIFU' and x.year == 2019]
assert len(kifu2019) == 4
assert kifu2019[3].project == 'SuperHeroes for Science'
parent_data = db.session.scalars(select(model.ParentOrganization)).all()
# test a random entry
asnet2021 = [x for x in parent_data if x.nren.name == 'ASNET-AM' and x.year == 2021]
assert len(asnet2021) == 1
assert asnet2021[0].organization\
== 'Institute for Informatics and Automation Problems of the National Academy of Sciences of Armenia'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment