From 1a8ee8c701cc87c822092d637817bb4f828eec43 Mon Sep 17 00:00:00 2001
From: Remco Tukker <remco.tukker@geant.org>
Date: Fri, 22 Sep 2023 15:27:38 +0200
Subject: [PATCH] rest of the publisher for the legacy db tables

---
 compendium_v2/publishers/survey_publisher.py  |  17 +-
 .../survey_publisher_old_db_2022.py           | 292 ++++++++++++++----
 2 files changed, 239 insertions(+), 70 deletions(-)

diff --git a/compendium_v2/publishers/survey_publisher.py b/compendium_v2/publishers/survey_publisher.py
index f4378755..c22c8967 100644
--- a/compendium_v2/publishers/survey_publisher.py
+++ b/compendium_v2/publishers/survey_publisher.py
@@ -356,14 +356,13 @@ def _map_2023(nren, answers) -> None:
             light_description=fibre_light
         ))
 
-    network_map_urls = answers.get("network_map_urls")
-    if network_map_urls:
-        urls = [i.get("network_map_url", "") for i in network_map_urls if i.get("network_map_url", "") != ""]
-        if urls:
-            db.session.add(NetworkMapUrls(
-                nren_id=nren.id, nren=nren, year=year,
-                urls=urls
-            ))
+    network_map_urls = answers.get("network_map_urls", [])
+    urls = [i.get("network_map_url", "") for i in network_map_urls if i.get("network_map_url", "") != ""]
+    if urls:
+        db.session.add(NetworkMapUrls(
+            nren_id=nren.id, nren=nren, year=year,
+            urls=urls
+        ))
 
     monitoring_tools = answers.get("monitoring_tools", [])
     netflow_vendors = answers.get("netflow_vendors", "")
@@ -390,7 +389,7 @@ def _map_2023(nren, answers) -> None:
     traffic_statistics = answers.get("traffic_statistics")
     if traffic_statistics:
         traffic_statistics = traffic_statistics == "Yes"
-        urls = answers.get("traffic_statistics_urls")
+        urls = answers.get("traffic_statistics_urls", [])
         urls = [i.get("traffic_statistics_url", "") for i in urls if i.get("traffic_statistics_url")]
         db.session.add(TrafficStatistics(
             nren_id=nren.id, nren=nren, year=year,
diff --git a/compendium_v2/publishers/survey_publisher_old_db_2022.py b/compendium_v2/publishers/survey_publisher_old_db_2022.py
index 0a39c142..c8a59559 100644
--- a/compendium_v2/publishers/survey_publisher_old_db_2022.py
+++ b/compendium_v2/publishers/survey_publisher_old_db_2022.py
@@ -18,8 +18,9 @@ from sqlalchemy import delete, text
 from collections import defaultdict
 
 import compendium_v2
-from compendium_v2.conversion.mapping import CHARGING_LEVELS, CONNECTION, SERVICE_USER_TYPE_TO_CODE
-from compendium_v2.db.presentation_model_enums import CommercialCharges, CommercialConnectivityCoverage, FeeType, ServiceCategory, UserCategory
+from compendium_v2.conversion.mapping import CHARGING_LEVELS, CONNECTION, INTERCONNECTION, SERVICE_USER_TYPE_TO_CODE
+from compendium_v2.db.presentation_model_enums import CommercialCharges, CommercialConnectivityCoverage, \
+    ConnectionMethod, FeeType, ServiceCategory, UserCategory, YesNoPlanned
 from compendium_v2.environment import setup_logging
 from compendium_v2.config import load
 from compendium_v2.publishers.helpers import extract_urls
@@ -170,6 +171,17 @@ def query_question_id(question_id: int, year: int = 2022):
     return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]})
 
 
+def _parse_json_urls(value, nren_name):
+    if value and not value.startswith('['):
+        value = f'[{value}]'
+
+    try:
+        return [url.strip().strip('/') for url in json.loads(value) if url.strip()]
+    except json.decoder.JSONDecodeError:
+        logger.info(f'JSON decode error for urls for {nren_name}.')
+        return []
+
+
 def transfer_budget(nren_dict):
     rows = query_budget()
     for row in rows:
@@ -203,16 +215,6 @@ def transfer_budget(nren_dict):
 
 
 def transfer_institutions_urls(nren_dict):
-    def _parse_json(value):
-        if value and not value.startswith('['):
-            value = f'[{value}]'
-
-        try:
-            return [url.strip() for url in json.loads(value) if url.strip()]
-        except json.decoder.JSONDecodeError:
-            logger.info(f'JSON decode error for institution urls for {nren_name}.')
-            return []
-
     rows = recursive_query(16507)
 
     for row in rows:
@@ -222,7 +224,7 @@ def transfer_institutions_urls(nren_dict):
             continue
 
         urls = extract_urls(text=answer)
-        urls_json = _parse_json(answer)
+        urls_json = _parse_json_urls(answer, nren_name)
         if urls != urls_json:
             logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}')
 
@@ -809,128 +811,217 @@ def transfer_commercial_charging_level(nren_dict):
 
 
 def transfer_fibre_light(nren_dict):
-    rows = recursive_query()
+    fibre = recursive_query(16668)
+    fibre = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre}
+    fibre_comment = recursive_query(16669)
+    fibre_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre_comment}
 
-    for answer_id, nren_name, year, answer in rows:
+    for nren_name, year in fibre.keys() | fibre_comment.keys():
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
-        new_entry = presentation_models.FibreLight(
-            nren=nren_dict[nren_name],
-            nren_id=nren_dict[nren_name].id,
-            year=year,
-        )
-        db.session.merge(new_entry)
+        description = fibre.get((nren_name, year))
+        comment = fibre_comment.get((nren_name, year))
+        if description and description[0:5] != "Other":
+            if comment and comment.replace("-", "") != "":
+                logger.warning(
+                    f'fibre light comment while description is not "Other": {description} {comment} {nren_name}.'
+                )
+        else:
+            description = comment
+
+        if description:
+            new_entry = presentation_models.FibreLight(
+                nren=nren_dict[nren_name],
+                nren_id=nren_dict[nren_name].id,
+                year=year,
+                light_description=description
+            )
+            db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_network_map_urls(nren_dict):
-    rows = recursive_query()
+    rows = recursive_query(16670)
 
     for answer_id, nren_name, year, answer in rows:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        urls = extract_urls(text=answer)
+        urls_json = _parse_json_urls(answer, nren_name)
+        if urls != urls_json:
+            logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}')
+
+        if not urls:
+            logger.info(f'{nren_name} has no urls for {year}. Skipping.')
+            continue
+
         new_entry = presentation_models.NetworkMapUrls(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            urls=urls
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_traffic_statistics(nren_dict):
-    rows = recursive_query()
+    stats = recursive_query(16677)
+    stat_urls = recursive_query(16678)
+    stat_urls = {(nren_name, year): answer for answer_id, nren_name, year, answer in stat_urls}
 
-    for answer_id, nren_name, year, answer in rows:
+    for answer_id, nren_name, year, answer in stats:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        db_urls = stat_urls.get((nren_name, year))
+        if db_urls:
+            urls = extract_urls(text=db_urls)
+            urls_json = _parse_json_urls(db_urls, nren_name)
+            if urls != urls_json:
+                logger.info(
+                    f'Traffic stat URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}'
+                )
+            db_urls = urls
+        else:
+            db_urls = []
+
         new_entry = presentation_models.TrafficStatistics(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            traffic_statistics=answer == '"Yes"',
+            urls=db_urls
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_siem_vendors(nren_dict):
-    rows = recursive_query()
+    vendors = recursive_query(16679)
+    vendors = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in vendors}
+    vendor_comment = recursive_query(16680)
+    vendor_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in vendor_comment}
 
-    for answer_id, nren_name, year, answer in rows:
+    for nren_name, year in vendors.keys() | vendor_comment.keys():
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        vendor_names = vendors.get((nren_name, year), [])
+        comment = vendor_comment.get((nren_name, year))
+        if comment:
+            vendor_names.append(comment)
+            vendor_names.remove("Other")
+
         new_entry = presentation_models.SiemVendors(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            vendor_names=vendor_names
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_certificate_providers(nren_dict):
-    rows = recursive_query()
+    providers = recursive_query(16681)
+    providers = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in providers}
+    prov_comment = recursive_query(16682)
+    prov_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in prov_comment}
 
-    for answer_id, nren_name, year, answer in rows:
+    for nren_name, year in providers.keys() | prov_comment.keys():
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        provider_names = providers.get((nren_name, year), [])
+        comment = prov_comment.get((nren_name, year))
+        if comment:
+            provider_names.append(comment)
+            if "Other" in provider_names:
+                provider_names.remove("Other")
+
         new_entry = presentation_models.CertificateProviders(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            provider_names=provider_names
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_weather_map(nren_dict):
-    rows = recursive_query()
+    weather = recursive_query(16683)
+    urls = recursive_query(16684)
+    urls = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in urls}
 
-    for answer_id, nren_name, year, answer in rows:
+    for answer_id, nren_name, year, answer in weather:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        url = urls.get((nren_name, year), "")
+        if url:
+            found_urls = extract_urls(text=url)
+            if found_urls:
+                url = found_urls[0]
+            else:
+                url = ""
+
+        orig_url = urls.get((nren_name, year), "").strip("/")
+        if url != orig_url:
+            logger.info(f'Weather URL for {nren_name} do not match between json and regex. {url} != {orig_url}')
+
         new_entry = presentation_models.WeatherMap(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            weather_map=answer == '"Yes"',
+            url=url
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_pert_team(nren_dict):
-    rows = recursive_query()
+    rows = recursive_query(16685)
 
     for answer_id, nren_name, year, answer in rows:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        if answer == "null":
+            continue
+        pert = YesNoPlanned[answer.strip('"').lower()]
         new_entry = presentation_models.PertTeam(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            pert_team=pert
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_alien_wave(nren_dict):
-    rows = recursive_query()
+    alien = recursive_query(16687)
+    alien = {
+        (nren_name, year): YesNoPlanned[answer.strip('"').lower()] for answer_id, nren_name, year, answer in alien
+    }
+    nr = recursive_query(16688)
+    nr = {(nren_name, year): int(answer.strip('"')) for answer_id, nren_name, year, answer in nr}
+    internal = recursive_query(16689)
+    internal = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in internal}
 
-    for answer_id, nren_name, year, answer in rows:
+    for nren_name, year in alien.keys() | nr.keys() | internal.keys():
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
@@ -939,57 +1030,136 @@ def transfer_alien_wave(nren_dict):
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            alien_wave_third_pary=alien.get((nren_name, year)),
+            nr_of_alien_wave_third_party_services=nr.get((nren_name, year)),
+            alien_wave_internal=internal.get((nren_name, year))
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_external_connections(nren_dict):
-    rows = recursive_query()
-
-    for answer_id, nren_name, year, answer in rows:
-        if nren_name not in nren_dict:
-            logger.warning(f'{nren_name} unknown. Skipping.')
-            continue
+    question_nrs = {
+        16694: (5, "capacity"),
+        16695: (7, "capacity"),
+        16696: (6, "capacity"),
+        16697: (7, "from_organization"),
+        16698: (1, "to_organization"),
+        16699: (8, "to_organization"),
+        16700: (9, "to_organization"),
+        16701: (1, "from_organization"),
+        16702: (8, "capacity"),
+        16703: (5, "to_organization"),
+        16704: (0, "link_name"),
+        16705: (1, "link_name"),
+        16706: (9, "capacity"),
+        16707: (2, "link_name"),
+        16708: (0, "from_organization"),
+        16709: (4, "link_name"),
+        16710: (3, "link_name"),
+        16711: (9, "link_name"),
+        16712: (7, "link_name"),
+        16713: (8, "link_name"),
+        16714: (6, "link_name"),
+        16715: (5, "link_name"),
+        16716: (4, "from_organization"),
+        16717: (5, "from_organization"),
+        16718: (6, "from_organization"),
+        16719: (2, "to_organization"),
+        16720: (3, "to_organization"),
+        16721: (4, "to_organization"),
+        16722: (6, "to_organization"),
+        16723: (7, "to_organization"),
+        16724: (2, "interconnection_method"),
+        16725: (3, "interconnection_method"),
+        16726: (4, "interconnection_method"),
+        16727: (5, "interconnection_method"),
+        16728: (8, "from_organization"),
+        16729: (9, "from_organization"),
+        16730: (0, "to_organization"),
+        16731: (0, "capacity"),
+        16732: (1, "capacity"),
+        16733: (2, "capacity"),
+        16734: (3, "capacity"),
+        16735: (4, "capacity"),
+        16736: (3, "from_organization"),
+        16737: (2, "from_organization"),
+        16738: (1, "interconnection_method"),
+        16739: (7, "interconnection_method"),
+        16740: (8, "interconnection_method"),
+        16741: (0, "interconnection_method"),
+        16742: (9, "interconnection_method"),
+        16743: (6, "interconnection_method")
+    }
 
-        new_entry = presentation_models.ExternalConnections(
-            nren=nren_dict[nren_name],
-            nren_id=nren_dict[nren_name].id,
-            year=year,
-        )
-        db.session.merge(new_entry)
-    db.session.commit()
+    def empty_connection_dict():
+        return {'link_name': '', 'capacity': None, 'from_organization': '',
+                'to_organization': '', 'interconnection_method': None}
 
+    connection_dicts = {}
+    nren_year_set = set()
+    for question_id, (connection_nr, field) in question_nrs.items():
+        rows = recursive_query(question_id)
+        for answer_id, nren_name, year, answer in rows:
+            nren_year_set.add((nren_name, year))
+            conn_dict = connection_dicts.setdefault((nren_name, year, connection_nr), empty_connection_dict())
+            conn_dict[field] = answer.strip('" ')
 
-def transfer_traffic_ratio(nren_dict):
-    rows = recursive_query()
+    int_simple = {key.replace(" ", "").lower(): value for key, value in INTERCONNECTION.items()}
+    int_simple['openexchangepoi'] = "open_exchange"
 
-    for answer_id, nren_name, year, answer in rows:
+    for conn_dict in connection_dicts.values():
+        if conn_dict['capacity']:
+            try:
+                conn_dict['capacity'] = str(Decimal(conn_dict['capacity'].split('G')[0].strip()))
+            except:  # noqa: E722
+                logger.warning(f'Capacity could not be converted for {nren_name}: {conn_dict["capacity"]}.')
+                conn_dict['capacity'] = None
+        if conn_dict['interconnection_method']:
+            int_conn = int_simple[conn_dict['interconnection_method'].replace(" ", "").lower()]
+            conn_dict['interconnection_method'] = ConnectionMethod[int_conn].value
+
+    for nren_name, year in nren_year_set:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
-        new_entry = presentation_models.TrafficRatio(
+        connections = []
+        for connection_nr in range(0, 10):
+            conn = connection_dicts.get((nren_name, year, connection_nr))
+            if conn:
+                connections.append(conn)
+
+        new_entry = presentation_models.ExternalConnections(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            connections=connections
         )
         db.session.merge(new_entry)
     db.session.commit()
 
 
 def transfer_network_automation(nren_dict):
-    rows = recursive_query()
+    rows = recursive_query(16757)
+    tasks = recursive_query(16758)
+    tasks = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in tasks}
 
     for answer_id, nren_name, year, answer in rows:
         if nren_name not in nren_dict:
             logger.warning(f'{nren_name} unknown. Skipping.')
             continue
 
+        network_automation = YesNoPlanned[answer.strip('"').lower()]
+        specifics = tasks.get((nren_name, year), [])
+        specifics = [s for s in specifics if s]
+
         new_entry = presentation_models.NetworkAutomation(
             nren=nren_dict[nren_name],
             nren_id=nren_dict[nren_name].id,
             year=year,
+            network_automation=network_automation,
+            network_automation_specifics=specifics
         )
         db.session.merge(new_entry)
     db.session.commit()
@@ -998,15 +1168,15 @@ def transfer_network_automation(nren_dict):
 def _cli(app):
     with app.app_context():
         nren_dict = helpers.get_uppercase_nren_dict()
-        # transfer_budget(nren_dict)
-        # transfer_funding_sources(nren_dict)
-        # transfer_staff_data(nren_dict)
-        # transfer_nren_parent_org(nren_dict)
-        # transfer_nren_sub_org(nren_dict)
-        # transfer_charging_structure(nren_dict)
-        # transfer_ec_projects(nren_dict)
-        # transfer_policies(nren_dict)
-        # transfer_institutions_urls(nren_dict)
+        transfer_budget(nren_dict)
+        transfer_funding_sources(nren_dict)
+        transfer_staff_data(nren_dict)
+        transfer_nren_parent_org(nren_dict)
+        transfer_nren_sub_org(nren_dict)
+        transfer_charging_structure(nren_dict)
+        transfer_ec_projects(nren_dict)
+        transfer_policies(nren_dict)
+        transfer_institutions_urls(nren_dict)
 
         transfer_central_procurement(nren_dict)
         transfer_service_management(nren_dict)
@@ -1028,7 +1198,7 @@ def _cli(app):
         transfer_pert_team(nren_dict)
         transfer_alien_wave(nren_dict)
         transfer_external_connections(nren_dict)
-        transfer_traffic_ratio(nren_dict)
+        # traffic ratio was freeform text
         transfer_network_automation(nren_dict)
 
 
-- 
GitLab