Skip to content
Snippets Groups Projects
Commit 95dca95c authored by Remco Tukker's avatar Remco Tukker
Browse files

Merge branch 'feature/COMP-168_v1_publisher_ec_projects' into 'develop'

Feature/comp 168 v1 publisher ec projects

See merge request !14
parents 989681f5 d3e16659
No related branches found
No related tags found
1 merge request!14Feature/comp 168 v1 publisher ec projects
...@@ -20,8 +20,8 @@ def fetch_budget_excel_data(): ...@@ -20,8 +20,8 @@ def fetch_budget_excel_data():
# select the active worksheet # select the active worksheet
ws = wb[sheet_name] ws = wb[sheet_name]
# iterate over the rows in the worksheet # iterate over the rows in the worksheet
for row in range(14, 57): for row in range(14, 58):
for col in range(3, 8): for col in range(3, 9):
# extract the data from the row # extract the data from the row
nren = ws.cell(row=row, column=2).value nren = ws.cell(row=row, column=2).value
budget = ws.cell(row=row, column=col).value budget = ws.cell(row=row, column=col).value
...@@ -29,8 +29,6 @@ def fetch_budget_excel_data(): ...@@ -29,8 +29,6 @@ def fetch_budget_excel_data():
if budget is not None: if budget is not None:
budget = round(budget / 1000000, 2) budget = round(budget / 1000000, 2)
if budget > 200:
logger.info(f'{nren} has budget set to >200M EUR for {year}. ({budget})')
yield nren.upper(), budget, year yield nren.upper(), budget, year
...@@ -99,19 +97,19 @@ def fetch_funding_excel_data(): ...@@ -99,19 +97,19 @@ def fetch_funding_excel_data():
yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other) yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
# For 2016 # For 2016
yield from create_points_for_year_until_2017(8, 50, 2016, 43, 45) yield from create_points_for_year_until_2017(8, 51, 2016, 43, 45)
# For 2017 # For 2017
yield from create_points_for_year_until_2017(8, 50, 2017, 32, 35) yield from create_points_for_year_until_2017(8, 51, 2017, 32, 35)
# For 2018 # For 2018
yield from create_points_for_year_from_2018(8, 50, 2018, 21) yield from create_points_for_year_from_2018(8, 51, 2018, 21)
# For 2019 # For 2019
yield from create_points_for_year_from_2018(8, 50, 2019, 12) yield from create_points_for_year_from_2018(8, 51, 2019, 12)
# For 2020 # For 2020
yield from create_points_for_year_from_2018(8, 50, 2020, 3) yield from create_points_for_year_from_2018(8, 51, 2020, 3)
def fetch_charging_structure_excel_data(): def fetch_charging_structure_excel_data():
...@@ -148,10 +146,10 @@ def fetch_charging_structure_excel_data(): ...@@ -148,10 +146,10 @@ def fetch_charging_structure_excel_data():
yield nren.upper(), year, charging_structure yield nren.upper(), year, charging_structure
# For 2021 # For 2021
yield from create_points_for_year(3, 45, 2021, 2) yield from create_points_for_year(3, 46, 2021, 2)
# For 2019 # For 2019
yield from create_points_for_year(3, 45, 2019, 6) yield from create_points_for_year(3, 46, 2019, 6)
def fetch_staffing_excel_data(): def fetch_staffing_excel_data():
...@@ -268,3 +266,53 @@ def fetch_staff_function_excel_data(): ...@@ -268,3 +266,53 @@ def fetch_staff_function_excel_data():
# For 2021 # For 2021
yield from create_points_for_year(2021, 3, 5) yield from create_points_for_year(2021, 3, 5)
def fetch_ecproject_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "7. EC Projects"
ws = wb[sheet_name]
start_row = 6
def create_points_for_year(year, start_column, end_row):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=start_column).value
if nren is None:
continue
project = ws.cell(row=row, column=start_column + 1).value
if project is None:
continue
yield nren.upper(), year, project
yield from create_points_for_year(2017, 13, 165)
yield from create_points_for_year(2018, 10, 165)
yield from create_points_for_year(2019, 7, 165)
yield from create_points_for_year(2020, 4, 180)
yield from create_points_for_year(2021, 1, 173)
def fetch_organization_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "Organization"
ws = wb[sheet_name]
# iterate over the rows in the worksheet
for row in range(5, 48):
# extract the data from the row
nren = ws.cell(row=row, column=2).value
parent_org = ws.cell(row=row, column=4).value
if parent_org not in [None, 'NA', 'N/A']:
yield nren.upper(), 2021, parent_org
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
survey_publisher_v1 survey_publisher_v1
========================= =========================
This module loads the survey data from before 2022 from and excel file. This module loads the survey data from before 2022 from an excel file.
Missing info is filled in from the survey db for some questions. Missing info is filled in from the survey db for some questions.
Registered as click cli command when installing compendium-v2. Registered as click cli command when installing compendium-v2.
...@@ -38,10 +38,10 @@ def db_budget_migration(): ...@@ -38,10 +38,10 @@ def db_budget_migration():
year = budget.year year = budget.year
if float(budget.budget) > 200: if float(budget.budget) > 200:
logger.info(f'Incorrect Data: {abbrev} has budget set to >200M EUR for {year}. ({budget.budget})') logger.warning(f'Incorrect Data: {abbrev} has budget set >200M EUR for {year}. ({budget.budget})')
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
continue continue
budget_entry = model.BudgetEntry( budget_entry = model.BudgetEntry(
...@@ -56,9 +56,12 @@ def db_budget_migration(): ...@@ -56,9 +56,12 @@ def db_budget_migration():
for abbrev, budget, year in exceldata: for abbrev, budget, year in exceldata:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
continue continue
if budget > 200:
logger.warning(f'{nren} has budget set to >200M EUR for {year}. ({budget})')
budget_entry = model.BudgetEntry(nren=nren_dict[abbrev], budget=budget, year=year) budget_entry = model.BudgetEntry(nren=nren_dict[abbrev], budget=budget, year=year)
session.merge(budget_entry) session.merge(budget_entry)
session.commit() session.commit()
...@@ -78,11 +81,11 @@ def db_funding_migration(): ...@@ -78,11 +81,11 @@ def db_funding_migration():
_data = [client_institution, european_funding, gov_public_bodies, commercial, other] _data = [client_institution, european_funding, gov_public_bodies, commercial, other]
total = sum(_data) total = sum(_data)
if not math.isclose(total, 100, abs_tol=0.01): if not math.isclose(total, 100, abs_tol=0.01) and total != 0:
logger.info(f'{abbrev} funding sources for {year} do not sum to 100% ({total})') logger.warning(f'{abbrev} funding sources for {year} do not sum to 100% ({total})')
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
continue continue
budget_entry = model.FundingSource( budget_entry = model.FundingSource(
...@@ -106,7 +109,7 @@ def db_charging_structure_migration(): ...@@ -106,7 +109,7 @@ def db_charging_structure_migration():
for (abbrev, year, charging_structure) in data: for (abbrev, year, charging_structure) in data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping.') logger.warning(f'{abbrev} unknown. Skipping.')
continue continue
charging_structure_entry = model.ChargingStructure( charging_structure_entry = model.ChargingStructure(
...@@ -124,7 +127,7 @@ def db_staffing_migration(): ...@@ -124,7 +127,7 @@ def db_staffing_migration():
nren_staff_map = {} nren_staff_map = {}
for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data: for (abbrev, year, permanent_fte, subcontracted_fte) in staff_data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping staff data.') logger.warning(f'{abbrev} unknown. Skipping staff data.')
continue continue
nren = nren_dict[abbrev] nren = nren_dict[abbrev]
...@@ -141,7 +144,7 @@ def db_staffing_migration(): ...@@ -141,7 +144,7 @@ def db_staffing_migration():
function_data = parse_excel_data.fetch_staff_function_excel_data() function_data = parse_excel_data.fetch_staff_function_excel_data()
for (abbrev, year, technical_fte, non_technical_fte) in function_data: for (abbrev, year, technical_fte, non_technical_fte) in function_data:
if abbrev not in nren_dict: if abbrev not in nren_dict:
logger.info(f'{abbrev} unknown. Skipping staff function data.') logger.warning(f'{abbrev} unknown. Skipping staff function data.')
continue continue
nren = nren_dict[abbrev] nren = nren_dict[abbrev]
...@@ -162,21 +165,55 @@ def db_staffing_migration(): ...@@ -162,21 +165,55 @@ def db_staffing_migration():
for nren_staff_model in nren_staff_map.values(): for nren_staff_model in nren_staff_map.values():
employed = nren_staff_model.permanent_fte + nren_staff_model.subcontracted_fte employed = nren_staff_model.permanent_fte + nren_staff_model.subcontracted_fte
technical = nren_staff_model.technical_fte + nren_staff_model.non_technical_fte technical = nren_staff_model.technical_fte + nren_staff_model.non_technical_fte
if not math.isclose(employed, technical, abs_tol=0.01): if not math.isclose(employed, technical, abs_tol=0.01) and employed != 0 and technical != 0:
logger.info(f'{nren_staff_model.nren.name} in {nren_staff_model.year}:' logger.warning(f'{nren_staff_model.nren.name} in {nren_staff_model.year}:'
f' FTE do not equal across employed/technical categories ({employed} != {technical})') f' FTE do not equal across employed/technical categories ({employed} != {technical})')
session.merge(nren_staff_model) session.merge(nren_staff_model)
session.commit() session.commit()
def db_ecprojects_migration():
with db.session_scope() as session:
nren_dict = helpers.get_uppercase_nren_dict(session)
ecproject_data = parse_excel_data.fetch_ecproject_excel_data()
for (abbrev, year, project) in ecproject_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
ecproject_entry = model.ECProject(nren=nren, nren_id=nren.id, year=year, project=project)
session.merge(ecproject_entry)
session.commit()
def db_organizations_migration():
with db.session_scope() as session:
nren_dict = helpers.get_uppercase_nren_dict(session)
organization_data = parse_excel_data.fetch_organization_excel_data()
for (abbrev, year, org) in organization_data:
if abbrev not in nren_dict:
logger.warning(f'{abbrev} unknown. Skipping.')
continue
nren = nren_dict[abbrev]
org_entry = model.ParentOrganization(nren=nren, nren_id=nren.id, year=year, organization=org)
session.merge(org_entry)
session.commit()
def _cli(config): def _cli(config):
helpers.init_db(config) helpers.init_db(config)
db_budget_migration() db_budget_migration()
db_funding_migration() db_funding_migration()
db_charging_structure_migration() db_charging_structure_migration()
db_staffing_migration() db_staffing_migration()
db_ecprojects_migration()
db_organizations_migration()
@click.command() @click.command()
......
...@@ -68,3 +68,24 @@ def test_publisher(client, mocker, dummy_config): ...@@ -68,3 +68,24 @@ def test_publisher(client, mocker, dummy_config):
assert kifu_data[5].subcontracted_fte == 3 assert kifu_data[5].subcontracted_fte == 3
assert kifu_data[5].technical_fte == 133 assert kifu_data[5].technical_fte == 133
assert kifu_data[5].non_technical_fte == 45 assert kifu_data[5].non_technical_fte == 45
ecproject_data = session.query(model.ECProject).all()
# test a couple of random entries
surf2017 = [x for x in ecproject_data if x.nren.name == 'SURF' and x.year == 2017]
assert len(surf2017) == 1
assert surf2017[0].project == 'Asterics and Magic'
asnetam2018 = [x for x in ecproject_data if x.nren.name == 'ASNET-AM' and x.year == 2018]
assert len(asnetam2018) == 1
assert asnetam2018[0].project == 'EaPConnect'
kifu2019 = [x for x in ecproject_data if x.nren.name == 'KIFU' and x.year == 2019]
assert len(kifu2019) == 4
assert kifu2019[3].project == 'SuperHeroes for Science'
parent_data = session.query(model.ParentOrganization).all()
# test a random entry
asnet2021 = [x for x in parent_data if x.nren.name == 'ASNET-AM' and x.year == 2021]
assert len(asnet2021) == 1
assert asnet2021[0].organization\
== 'Institute for Informatics and Automation Problems of the National Academy of Sciences of Armenia'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment