From c1d4fb1745a8da662ec548b0bb1b2428b0c233bc Mon Sep 17 00:00:00 2001
From: Bjarke Madsen <bjarke@nordu.net>
Date: Mon, 10 Feb 2025 12:26:50 +0100
Subject: [PATCH] Fix HEAnet funding source data & round everything to 2
 decimal places

---
 .../survey_publisher_legacy_db.py             | 18 ++++++-----
 .../survey_publisher_legacy_excel.py          | 18 +++++++----
 .../survey_publisher_legacy_excel.py          | 28 ++++++++++------
 .../survey_publisher_old_db_2022.py           | 32 +++++++++++--------
 4 files changed, 58 insertions(+), 38 deletions(-)

diff --git a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py
index 8512f37a..63b86f35 100644
--- a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py
+++ b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_db.py
@@ -216,19 +216,21 @@ def funding_sources(nren_dict):
         if nren_name not in nren_dict:
             continue
 
-        if nren_name == 'HEANET':
-            nren_info[FundingSource.OTHER] = nren_info[FundingSource.OTHER] + nren_info[FundingSource.COMMERCIAL]
+        nren = nren_dict[nren_name]
+
+        if nren.name.lower() == 'heanet':
+            nren_info[FundingSource.OTHER] = 77.78
             nren_info[FundingSource.COMMERCIAL] = 0
 
         data = {
-            'client_institutions': nren_info[FundingSource.CLIENT_INSTITUTIONS],
-            'european_funding': nren_info[FundingSource.EUROPEAN_FUNDING],
-            'commercial': nren_info[FundingSource.COMMERCIAL],
-            'other': nren_info[FundingSource.OTHER],
-            'gov_public_bodies': nren_info[FundingSource.GOV_PUBLIC_BODIES],
+            'client_institutions': round(nren_info[FundingSource.CLIENT_INSTITUTIONS], 2),
+            'european_funding': round(nren_info[FundingSource.EUROPEAN_FUNDING], 2),
+            'commercial': round(nren_info[FundingSource.COMMERCIAL], 2),
+            'other': round(nren_info[FundingSource.OTHER], 2),
+            'gov_public_bodies': round(nren_info[FundingSource.GOV_PUBLIC_BODIES], 2),
         }
 
-        yield ('income_sources', nren_dict[nren_name], nren_dict[nren_name].id, 2022, data)
+        yield ('income_sources', nren, nren.id, 2022, data)
 
 
 def charging_structure(nren_dict):
diff --git a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py
index f4350741..e4b8faa3 100644
--- a/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py
+++ b/compendium_v2/publishers/legacy_publisher/survey_publisher_legacy_excel.py
@@ -68,15 +68,21 @@ def funding(nren_dict):
         if abbrev not in nren_dict:
             continue
 
+        nren = nren_dict[helpers.map_nren(abbrev)]
+
         _data = {
-            'client_institutions': client_institution,
-            'european_funding': european_funding,
-            'commercial': commercial,
-            'other': other,
-            'gov_public_bodies': gov_public_bodies,
+            'client_institutions': round(client_institution, 2),
+            'european_funding': round(european_funding, 2),
+            'commercial': round(commercial, 2),
+            'other': round(other, 2),
+            'gov_public_bodies': round(gov_public_bodies, 2),
         }
 
-        nren = nren_dict[helpers.map_nren(abbrev)]
+        if nren.name.lower() == 'heanet':
+            if year == 2016:
+                _data['other'] = 1.0
+            if year == 2018:
+                _data['other'] = 3.0
 
         if sum(_data.values()) == 0:
             continue
diff --git a/compendium_v2/publishers/survey_publisher_legacy_excel.py b/compendium_v2/publishers/survey_publisher_legacy_excel.py
index d9bb2a60..65b35424 100644
--- a/compendium_v2/publishers/survey_publisher_legacy_excel.py
+++ b/compendium_v2/publishers/survey_publisher_legacy_excel.py
@@ -98,24 +98,32 @@ def db_funding_migration(nren_dict):
             gov_public_bodies,
             commercial, other) in data:
 
+        if abbrev not in nren_dict:
+            logger.warning(f'{abbrev} unknown. Skipping.')
+            continue
+        nren = nren_dict[abbrev]
+
+        if nren.name.lower() == 'heanet':
+            if year == 2016:
+                other = 1.0
+            if year == 2018:
+                other = 3.0
+
         _data = [client_institution, european_funding, gov_public_bodies, commercial, other]
         total = sum(_data)
         if not math.isclose(total, 100, abs_tol=0.01):
             logger.warning(f'{abbrev} funding sources for {year} do not sum to 100% ({total})')
             continue
 
-        if abbrev not in nren_dict:
-            logger.warning(f'{abbrev} unknown. Skipping.')
-            continue
         inserts.append({
-            'nren': nren_dict[abbrev],
-            'nren_id': nren_dict[abbrev].id,
+            'nren': nren,
+            'nren_id': nren.id,
             'year': year,
-            'client_institutions': client_institution,
-            'european_funding': european_funding,
-            'gov_public_bodies': gov_public_bodies,
-            'commercial': commercial,
-            'other': other
+            'client_institutions': round(client_institution, 2),
+            'european_funding': round(european_funding, 2),
+            'gov_public_bodies': round(gov_public_bodies, 2),
+            'commercial': round(commercial, 2),
+            'other': round(other, 2),
         })
     db.session.bulk_insert_mappings(presentation_models.FundingSource, inserts)
     db.session.commit()
diff --git a/compendium_v2/publishers/survey_publisher_old_db_2022.py b/compendium_v2/publishers/survey_publisher_old_db_2022.py
index c49d86ac..b3cfbe2b 100644
--- a/compendium_v2/publishers/survey_publisher_old_db_2022.py
+++ b/compendium_v2/publishers/survey_publisher_old_db_2022.py
@@ -290,29 +290,33 @@ def transfer_funding_sources(nren_dict):
             nren_info[source] = value
 
     for nren_name, nren_info in sourcedata.items():
-        total = sum(nren_info.values())
-
-        if not math.isclose(total, 100, abs_tol=0.01):
-            logger.info(f'{nren_name} funding sources do not sum to 100%. ({total})')
-            continue
 
         if nren_name not in nren_dict:
             logger.info(f'{nren_name} unknown. Skipping.')
             continue
 
-        if nren_name == 'HEANET':
-            nren_info[FundingSource.OTHER] = nren_info[FundingSource.OTHER] + nren_info[FundingSource.COMMERCIAL]
+        nren = nren_dict[nren_name]
+
+        if nren.name.lower() == 'heanet':
+            nren_info[FundingSource.OTHER] = round(nren_info[FundingSource.COMMERCIAL] + nren_info[FundingSource.OTHER], 2)
             nren_info[FundingSource.COMMERCIAL] = 0
+            nren_info[FundingSource.GOV_PUBLIC_BODIES] = 77.78
+
+        total = sum(nren_info.values())
+
+        if not math.isclose(total, 100, abs_tol=0.01):
+            logger.info(f'{nren_name} funding sources do not sum to 100%. ({total})')
+            continue
 
         funding_source = presentation_models.FundingSource(
-            nren=nren_dict[nren_name],
-            nren_id=nren_dict[nren_name].id,
+            nren=nren,
+            nren_id=nren.id,
             year=2022,
-            client_institutions=nren_info[FundingSource.CLIENT_INSTITUTIONS],
-            european_funding=nren_info[FundingSource.EUROPEAN_FUNDING],
-            gov_public_bodies=nren_info[FundingSource.GOV_PUBLIC_BODIES],
-            commercial=nren_info[FundingSource.COMMERCIAL],
-            other=nren_info[FundingSource.OTHER],
+            client_institutions=round(nren_info[FundingSource.CLIENT_INSTITUTIONS], 2),
+            european_funding=round(nren_info[FundingSource.EUROPEAN_FUNDING], 2),
+            gov_public_bodies=round(nren_info[FundingSource.GOV_PUBLIC_BODIES], 2),
+            commercial=round(nren_info[FundingSource.COMMERCIAL], 2),
+            other=round(nren_info[FundingSource.OTHER], 2),
         )
         db.session.merge(funding_source)
     db.session.commit()
-- 
GitLab