From c1d4fb1745a8da662ec548b0bb1b2428b0c233bc Mon Sep 17 00:00:00 2001 From: Bjarke Madsen <bjarke@nordu.net> Date: Mon, 10 Feb 2025 12:26:50 +0100 Subject: [PATCH] Fix HEAnet funding source data & round everything to 2 decimal places --- .../survey_publisher_legacy_db.py | 18 ++++++----- .../survey_publisher_legacy_excel.py | 18 +++++++---- .../survey_publisher_legacy_excel.py | 28 ++++++++++------ .../survey_publisher_old_db_2022.py | 32 +++++++++++-------- 4 files changed, 58 insertions(+), 38 deletions(-) diff --git a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py index 8512f37a..63b86f35 100644 --- a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py +++ b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py @@ -216,19 +216,21 @@ def funding_sources(nren_dict): if nren_name not in nren_dict: continue - if nren_name == 'HEANET': - nren_info[FundingSource.OTHER] = nren_info[FundingSource.OTHER] + nren_info[FundingSource.COMMERCIAL] + nren = nren_dict[nren_name] + + if nren.name.lower() == 'heanet': + nren_info[FundingSource.OTHER] = 77.78 nren_info[FundingSource.COMMERCIAL] = 0 data = { - 'client_institutions': nren_info[FundingSource.CLIENT_INSTITUTIONS], - 'european_funding': nren_info[FundingSource.EUROPEAN_FUNDING], - 'commercial': nren_info[FundingSource.COMMERCIAL], - 'other': nren_info[FundingSource.OTHER], - 'gov_public_bodies': nren_info[FundingSource.GOV_PUBLIC_BODIES], + 'client_institutions': round(nren_info[FundingSource.CLIENT_INSTITUTIONS], 2), + 'european_funding': round(nren_info[FundingSource.EUROPEAN_FUNDING], 2), + 'commercial': round(nren_info[FundingSource.COMMERCIAL], 2), + 'other': round(nren_info[FundingSource.OTHER], 2), + 'gov_public_bodies': round(nren_info[FundingSource.GOV_PUBLIC_BODIES], 2), } - yield ('income_sources', nren_dict[nren_name], nren_dict[nren_name].id, 2022, data) + yield ('income_sources', nren, nren.id, 2022, data) def charging_structure(nren_dict): diff --git a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py index f4350741..e4b8faa3 100644 --- a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py +++ b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py @@ -68,15 +68,21 @@ def funding(nren_dict): if abbrev not in nren_dict: continue + nren = nren_dict[helpers.map_nren(abbrev)] + _data = { - 'client_institutions': client_institution, - 'european_funding': european_funding, - 'commercial': commercial, - 'other': other, - 'gov_public_bodies': gov_public_bodies, + 'client_institutions': round(client_institution, 2), + 'european_funding': round(european_funding, 2), + 'commercial': round(commercial, 2), + 'other': round(other, 2), + 'gov_public_bodies': round(gov_public_bodies, 2), } - nren = nren_dict[helpers.map_nren(abbrev)] + if nren.name.lower() == 'heanet': + if year == 2016: + _data['other'] = 1.0 + if year == 2018: + _data['other'] = 3.0 if sum(_data.values()) == 0: continue diff --git a/compendium_v2/publishers/survey_publisher_legacy_excel.py b/compendium_v2/publishers/survey_publisher_legacy_excel.py index d9bb2a60..65b35424 100644 --- a/compendium_v2/publishers/survey_publisher_legacy_excel.py +++ b/compendium_v2/publishers/survey_publisher_legacy_excel.py @@ -98,24 +98,32 @@ def db_funding_migration(nren_dict): gov_public_bodies, commercial, other) in data: + if abbrev not in nren_dict: + logger.warning(f'{abbrev} unknown. Skipping.') + continue + nren = nren_dict[abbrev] + + if nren.name.lower() == 'heanet': + if year == 2016: + other = 1.0 + if year == 2018: + other = 3.0 + _data = [client_institution, european_funding, gov_public_bodies, commercial, other] total = sum(_data) if not math.isclose(total, 100, abs_tol=0.01): logger.warning(f'{abbrev} funding sources for {year} do not sum to 100% ({total})') continue - if abbrev not in nren_dict: - logger.warning(f'{abbrev} unknown. Skipping.') - continue inserts.append({ - 'nren': nren_dict[abbrev], - 'nren_id': nren_dict[abbrev].id, + 'nren': nren, + 'nren_id': nren.id, 'year': year, - 'client_institutions': client_institution, - 'european_funding': european_funding, - 'gov_public_bodies': gov_public_bodies, - 'commercial': commercial, - 'other': other + 'client_institutions': round(client_institution, 2), + 'european_funding': round(european_funding, 2), + 'gov_public_bodies': round(gov_public_bodies, 2), + 'commercial': round(commercial, 2), + 'other': round(other, 2), }) db.session.bulk_insert_mappings(presentation_models.FundingSource, inserts) db.session.commit() diff --git a/compendium_v2/publishers/survey_publisher_old_db_2022.py b/compendium_v2/publishers/survey_publisher_old_db_2022.py index c49d86ac..b3cfbe2b 100644 --- a/compendium_v2/publishers/survey_publisher_old_db_2022.py +++ b/compendium_v2/publishers/survey_publisher_old_db_2022.py @@ -290,29 +290,33 @@ def transfer_funding_sources(nren_dict): nren_info[source] = value for nren_name, nren_info in sourcedata.items(): - total = sum(nren_info.values()) - - if not math.isclose(total, 100, abs_tol=0.01): - logger.info(f'{nren_name} funding sources do not sum to 100%. ({total})') - continue if nren_name not in nren_dict: logger.info(f'{nren_name} unknown. Skipping.') continue - if nren_name == 'HEANET': - nren_info[FundingSource.OTHER] = nren_info[FundingSource.OTHER] + nren_info[FundingSource.COMMERCIAL] + nren = nren_dict[nren_name] + + if nren.name.lower() == 'heanet': + nren_info[FundingSource.OTHER] = round(nren_info[FundingSource.COMMERCIAL] + nren_info[FundingSource.OTHER], 2) nren_info[FundingSource.COMMERCIAL] = 0 + nren_info[FundingSource.GOV_PUBLIC_BODIES] = 77.78 + + total = sum(nren_info.values()) + + if not math.isclose(total, 100, abs_tol=0.01): + logger.info(f'{nren_name} funding sources do not sum to 100%. ({total})') + continue funding_source = presentation_models.FundingSource( - nren=nren_dict[nren_name], - nren_id=nren_dict[nren_name].id, + nren=nren, + nren_id=nren.id, year=2022, - client_institutions=nren_info[FundingSource.CLIENT_INSTITUTIONS], - european_funding=nren_info[FundingSource.EUROPEAN_FUNDING], - gov_public_bodies=nren_info[FundingSource.GOV_PUBLIC_BODIES], - commercial=nren_info[FundingSource.COMMERCIAL], - other=nren_info[FundingSource.OTHER], + client_institutions=round(nren_info[FundingSource.CLIENT_INSTITUTIONS], 2), + european_funding=round(nren_info[FundingSource.EUROPEAN_FUNDING], 2), + gov_public_bodies=round(nren_info[FundingSource.GOV_PUBLIC_BODIES], 2), + commercial=round(nren_info[FundingSource.COMMERCIAL], 2), + other=round(nren_info[FundingSource.OTHER], 2), ) db.session.merge(funding_source) db.session.commit() -- GitLab