From b3856d0045069eca4a4427ddb32f5335bf27fa55 Mon Sep 17 00:00:00 2001
From: Remco Tukker <remco.tukker@geant.org>
Date: Tue, 23 May 2023 17:58:48 +0200
Subject: [PATCH] update a couple of nren names and exclude a few obviously
 invalid budget datapoints

---
 .../42a826af0431_update_a_few_nren_names.py   | 52 +++++++++++++++++++
 compendium_v2/publishers/helpers.py           |  9 ++--
 .../publishers/survey_publisher_2022.py       |  4 ++
 .../publishers/survey_publisher_v1.py         |  4 ++
 test/test_survey_publisher_2022.py            |  3 +-
 test/test_survey_publisher_v1.py              |  2 +-
 6 files changed, 69 insertions(+), 5 deletions(-)
 create mode 100644 compendium_v2/migrations/versions/42a826af0431_update_a_few_nren_names.py

diff --git a/compendium_v2/migrations/versions/42a826af0431_update_a_few_nren_names.py b/compendium_v2/migrations/versions/42a826af0431_update_a_few_nren_names.py
new file mode 100644
index 00000000..87a4f248
--- /dev/null
+++ b/compendium_v2/migrations/versions/42a826af0431_update_a_few_nren_names.py
@@ -0,0 +1,52 @@
+"""update a few nren names
+
+Revision ID: 42a826af0431
+Revises: 049353edaa79
+Create Date: 2023-05-23 17:02:26.020102
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = '42a826af0431'
+down_revision = '049353edaa79'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.execute("UPDATE nren SET name='ANAS' WHERE name='AzScienceNet'")
+    op.execute("UPDATE nren SET name='GRNET' WHERE name='GRNET S.A.'")
+    op.execute("UPDATE nren SET name='Restena' WHERE name='RESTENA'")
+    op.execute("UPDATE nren SET name='University of Malta' WHERE name='UoM'")
+    op.execute("UPDATE nren SET name='DeiC' WHERE name='DeIC'")
+    op.execute("UPDATE nren SET name='CSC' WHERE name='Funet'")
+    op.execute("UPDATE nren SET name='Sikt' WHERE name='SIKT'")
+
+    # also remove some obviously invalid datapoints that may have been imported using an old publisher
+    op.execute(
+        "DELETE FROM budgets WHERE year = 2013 AND nren_id IN ("
+        "  SELECT id FROM nren WHERE UPPER(name) IN ('JISC')"
+        ")"
+    )
+    op.execute(
+        "DELETE FROM budgets WHERE year = 2014 AND nren_id IN ("
+        "  SELECT id FROM nren WHERE UPPER(name) IN ('GARR')"
+        ")"
+    )
+    op.execute(
+        "DELETE FROM budgets WHERE year = 2022 AND nren_id IN ("
+        "  SELECT id FROM nren WHERE UPPER(name) IN ('BREN')"
+        ")"
+    )
+
+
+def downgrade():
+    op.execute("UPDATE nren SET name='AzScienceNet' WHERE name='ANAS'")
+    op.execute("UPDATE nren SET name='GRNET S.A.' WHERE name='GRNET'")
+    op.execute("UPDATE nren SET name='RESTENA' WHERE name='Restena'")
+    op.execute("UPDATE nren SET name='UoM' WHERE name='University of Malta'")
+    op.execute("UPDATE nren SET name='DeIC' WHERE name='DeiC'")
+    op.execute("UPDATE nren SET name='Funet' WHERE name='CSC'")
+    op.execute("UPDATE nren SET name='SIKT' WHERE name='Sikt'")
diff --git a/compendium_v2/publishers/helpers.py b/compendium_v2/publishers/helpers.py
index fb9fb40e..6f5e51b4 100644
--- a/compendium_v2/publishers/helpers.py
+++ b/compendium_v2/publishers/helpers.py
@@ -5,8 +5,7 @@ from compendium_v2.db import db, model
 
 def get_uppercase_nren_dict():
     """
-    :return: a dictionary of all known NRENs db entities keyed on the
-             uppercased name
+    :return: a dictionary of all known NRENs db entities keyed on the uppercased name
     """
     current_nrens = db.session.scalars(select(model.NREN))
     nren_dict = {nren.name.upper(): nren for nren in current_nrens}
@@ -14,8 +13,12 @@ def get_uppercase_nren_dict():
     nren_dict['ASNET'] = nren_dict['ASNET-AM']
     nren_dict['KIFU (NIIF)'] = nren_dict['KIFU']
     nren_dict['SURFNET'] = nren_dict['SURF']
-    nren_dict['UOM/RICERKANET'] = nren_dict['UOM']
+    nren_dict['UOM/RICERKANET'] = nren_dict['UNIVERSITY OF MALTA']
+    nren_dict['UOM'] = nren_dict['UNIVERSITY OF MALTA']
     nren_dict['UNINETT'] = nren_dict['SIKT']
     nren_dict['LANET'] = nren_dict['LAT']
     nren_dict['ANA'] = nren_dict['RASH']
+    nren_dict['AZSCIENCENET'] = nren_dict['ANAS']
+    nren_dict['GRNET S.A.'] = nren_dict['GRNET']
+    nren_dict['FUNET'] = nren_dict['CSC']
     return nren_dict
diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py
index 3d3cf02b..77cf39f7 100644
--- a/compendium_v2/publishers/survey_publisher_2022.py
+++ b/compendium_v2/publishers/survey_publisher_2022.py
@@ -146,6 +146,10 @@ def transfer_budget(nren_dict):
             logger.info(f'{nren_name} has no budget for 2022. Skipping. ({_budget}))')
             continue
 
+        if nren_name == 'BREN':
+            # obviously invalid datapoint
+            continue
+
         if budget > 200:
             logger.info(f'{nren_name} has budget set to >200M EUR for 2022. ({budget})')
 
diff --git a/compendium_v2/publishers/survey_publisher_v1.py b/compendium_v2/publishers/survey_publisher_v1.py
index cbb7a05e..147c9f7f 100644
--- a/compendium_v2/publishers/survey_publisher_v1.py
+++ b/compendium_v2/publishers/survey_publisher_v1.py
@@ -34,6 +34,10 @@ def db_budget_migration(nren_dict):
             abbrev = nren.abbreviation.upper()
             year = budget.year
 
+            if (year == 2013 and abbrev == 'JISC') or (year == 2014 and abbrev == 'GARR'):
+                # these are obviously invalid datapoints
+                continue
+
             if float(budget.budget) > 200:
                 logger.warning(f'Incorrect Data: {abbrev} has budget set >200M EUR for {year}. ({budget.budget})')
 
diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py
index b7da773d..4b406c6a 100644
--- a/test/test_survey_publisher_2022.py
+++ b/test/test_survey_publisher_2022.py
@@ -199,7 +199,8 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):
     mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question', question_data)
     mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question_id', question_id_data)
 
-    nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH']
+    nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'University of Malta', 'ASNET-AM',
+                  'SIKT', 'LAT', 'RASH', 'ANAS', 'GRNET', 'CSC']
     with app_with_survey_db.app_context():
         db.session.add_all([model.NREN(name=nren_name, country='country') for nren_name in nren_names])
         db.session.commit()
diff --git a/test/test_survey_publisher_v1.py b/test/test_survey_publisher_v1.py
index b08074b9..d3bd42b6 100644
--- a/test/test_survey_publisher_v1.py
+++ b/test/test_survey_publisher_v1.py
@@ -13,7 +13,7 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):
     mocker.patch('compendium_v2.background_task.parse_excel_data.EXCEL_FILE', EXCEL_FILE)
 
     with app_with_survey_db.app_context():
-        nren_names = ['SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH']
+        nren_names = ['SURF', 'KIFU', 'University of Malta', 'ASNET-AM', 'SIKT', 'LAT', 'RASH', 'ANAS', 'GRNET', 'CSC']
         db.session.add_all([model.NREN(name=nren_name, country='country') for nren_name in nren_names])
         db.session.commit()
 
-- 
GitLab