From 1a8ee8c701cc87c822092d637817bb4f828eec43 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Fri, 22 Sep 2023 15:27:38 +0200 Subject: [PATCH] rest of the publisher for the legacy db tables --- compendium_v2/publishers/survey_publisher.py | 17 +- .../survey_publisher_old_db_2022.py | 292 ++++++++++++++---- 2 files changed, 239 insertions(+), 70 deletions(-) diff --git a/compendium_v2/publishers/survey_publisher.py b/compendium_v2/publishers/survey_publisher.py index f4378755..c22c8967 100644 --- a/compendium_v2/publishers/survey_publisher.py +++ b/compendium_v2/publishers/survey_publisher.py @@ -356,14 +356,13 @@ def _map_2023(nren, answers) -> None: light_description=fibre_light )) - network_map_urls = answers.get("network_map_urls") - if network_map_urls: - urls = [i.get("network_map_url", "") for i in network_map_urls if i.get("network_map_url", "") != ""] - if urls: - db.session.add(NetworkMapUrls( - nren_id=nren.id, nren=nren, year=year, - urls=urls - )) + network_map_urls = answers.get("network_map_urls", []) + urls = [i.get("network_map_url", "") for i in network_map_urls if i.get("network_map_url", "") != ""] + if urls: + db.session.add(NetworkMapUrls( + nren_id=nren.id, nren=nren, year=year, + urls=urls + )) monitoring_tools = answers.get("monitoring_tools", []) netflow_vendors = answers.get("netflow_vendors", "") @@ -390,7 +389,7 @@ def _map_2023(nren, answers) -> None: traffic_statistics = answers.get("traffic_statistics") if traffic_statistics: traffic_statistics = traffic_statistics == "Yes" - urls = answers.get("traffic_statistics_urls") + urls = answers.get("traffic_statistics_urls", []) urls = [i.get("traffic_statistics_url", "") for i in urls if i.get("traffic_statistics_url")] db.session.add(TrafficStatistics( nren_id=nren.id, nren=nren, year=year, diff --git a/compendium_v2/publishers/survey_publisher_old_db_2022.py b/compendium_v2/publishers/survey_publisher_old_db_2022.py index 0a39c142..c8a59559 100644 --- a/compendium_v2/publishers/survey_publisher_old_db_2022.py +++ b/compendium_v2/publishers/survey_publisher_old_db_2022.py @@ -18,8 +18,9 @@ from sqlalchemy import delete, text from collections import defaultdict import compendium_v2 -from compendium_v2.conversion.mapping import CHARGING_LEVELS, CONNECTION, SERVICE_USER_TYPE_TO_CODE -from compendium_v2.db.presentation_model_enums import CommercialCharges, CommercialConnectivityCoverage, FeeType, ServiceCategory, UserCategory +from compendium_v2.conversion.mapping import CHARGING_LEVELS, CONNECTION, INTERCONNECTION, SERVICE_USER_TYPE_TO_CODE +from compendium_v2.db.presentation_model_enums import CommercialCharges, CommercialConnectivityCoverage, \ + ConnectionMethod, FeeType, ServiceCategory, UserCategory, YesNoPlanned from compendium_v2.environment import setup_logging from compendium_v2.config import load from compendium_v2.publishers.helpers import extract_urls @@ -170,6 +171,17 @@ def query_question_id(question_id: int, year: int = 2022): return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}) +def _parse_json_urls(value, nren_name): + if value and not value.startswith('['): + value = f'[{value}]' + + try: + return [url.strip().strip('/') for url in json.loads(value) if url.strip()] + except json.decoder.JSONDecodeError: + logger.info(f'JSON decode error for urls for {nren_name}.') + return [] + + def transfer_budget(nren_dict): rows = query_budget() for row in rows: @@ -203,16 +215,6 @@ def transfer_budget(nren_dict): def transfer_institutions_urls(nren_dict): - def _parse_json(value): - if value and not value.startswith('['): - value = f'[{value}]' - - try: - return [url.strip() for url in json.loads(value) if url.strip()] - except json.decoder.JSONDecodeError: - logger.info(f'JSON decode error for institution urls for {nren_name}.') - return [] - rows = recursive_query(16507) for row in rows: @@ -222,7 +224,7 @@ def transfer_institutions_urls(nren_dict): continue urls = extract_urls(text=answer) - urls_json = _parse_json(answer) + urls_json = _parse_json_urls(answer, nren_name) if urls != urls_json: logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}') @@ -809,128 +811,217 @@ def transfer_commercial_charging_level(nren_dict): def transfer_fibre_light(nren_dict): - rows = recursive_query() + fibre = recursive_query(16668) + fibre = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre} + fibre_comment = recursive_query(16669) + fibre_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in fibre_comment} - for answer_id, nren_name, year, answer in rows: + for nren_name, year in fibre.keys() | fibre_comment.keys(): if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue - new_entry = presentation_models.FibreLight( - nren=nren_dict[nren_name], - nren_id=nren_dict[nren_name].id, - year=year, - ) - db.session.merge(new_entry) + description = fibre.get((nren_name, year)) + comment = fibre_comment.get((nren_name, year)) + if description and description[0:5] != "Other": + if comment and comment.replace("-", "") != "": + logger.warning( + f'fibre light comment while description is not "Other": {description} {comment} {nren_name}.' + ) + else: + description = comment + + if description: + new_entry = presentation_models.FibreLight( + nren=nren_dict[nren_name], + nren_id=nren_dict[nren_name].id, + year=year, + light_description=description + ) + db.session.merge(new_entry) db.session.commit() def transfer_network_map_urls(nren_dict): - rows = recursive_query() + rows = recursive_query(16670) for answer_id, nren_name, year, answer in rows: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + urls = extract_urls(text=answer) + urls_json = _parse_json_urls(answer, nren_name) + if urls != urls_json: + logger.info(f'Institution URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}') + + if not urls: + logger.info(f'{nren_name} has no urls for {year}. Skipping.') + continue + new_entry = presentation_models.NetworkMapUrls( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + urls=urls ) db.session.merge(new_entry) db.session.commit() def transfer_traffic_statistics(nren_dict): - rows = recursive_query() + stats = recursive_query(16677) + stat_urls = recursive_query(16678) + stat_urls = {(nren_name, year): answer for answer_id, nren_name, year, answer in stat_urls} - for answer_id, nren_name, year, answer in rows: + for answer_id, nren_name, year, answer in stats: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + db_urls = stat_urls.get((nren_name, year)) + if db_urls: + urls = extract_urls(text=db_urls) + urls_json = _parse_json_urls(db_urls, nren_name) + if urls != urls_json: + logger.info( + f'Traffic stat URLs for {nren_name} do not match between json and regex. {urls} != {urls_json}' + ) + db_urls = urls + else: + db_urls = [] + new_entry = presentation_models.TrafficStatistics( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + traffic_statistics=answer == '"Yes"', + urls=db_urls ) db.session.merge(new_entry) db.session.commit() def transfer_siem_vendors(nren_dict): - rows = recursive_query() + vendors = recursive_query(16679) + vendors = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in vendors} + vendor_comment = recursive_query(16680) + vendor_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in vendor_comment} - for answer_id, nren_name, year, answer in rows: + for nren_name, year in vendors.keys() | vendor_comment.keys(): if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + vendor_names = vendors.get((nren_name, year), []) + comment = vendor_comment.get((nren_name, year)) + if comment: + vendor_names.append(comment) + vendor_names.remove("Other") + new_entry = presentation_models.SiemVendors( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + vendor_names=vendor_names ) db.session.merge(new_entry) db.session.commit() def transfer_certificate_providers(nren_dict): - rows = recursive_query() + providers = recursive_query(16681) + providers = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in providers} + prov_comment = recursive_query(16682) + prov_comment = {(nren_name, year): answer.strip('"') for answer_id, nren_name, year, answer in prov_comment} - for answer_id, nren_name, year, answer in rows: + for nren_name, year in providers.keys() | prov_comment.keys(): if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + provider_names = providers.get((nren_name, year), []) + comment = prov_comment.get((nren_name, year)) + if comment: + provider_names.append(comment) + if "Other" in provider_names: + provider_names.remove("Other") + new_entry = presentation_models.CertificateProviders( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + provider_names=provider_names ) db.session.merge(new_entry) db.session.commit() def transfer_weather_map(nren_dict): - rows = recursive_query() + weather = recursive_query(16683) + urls = recursive_query(16684) + urls = {(nren_name, year): answer.strip('" ') for answer_id, nren_name, year, answer in urls} - for answer_id, nren_name, year, answer in rows: + for answer_id, nren_name, year, answer in weather: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + url = urls.get((nren_name, year), "") + if url: + found_urls = extract_urls(text=url) + if found_urls: + url = found_urls[0] + else: + url = "" + + orig_url = urls.get((nren_name, year), "").strip("/") + if url != orig_url: + logger.info(f'Weather URL for {nren_name} do not match between json and regex. {url} != {orig_url}') + new_entry = presentation_models.WeatherMap( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + weather_map=answer == '"Yes"', + url=url ) db.session.merge(new_entry) db.session.commit() def transfer_pert_team(nren_dict): - rows = recursive_query() + rows = recursive_query(16685) for answer_id, nren_name, year, answer in rows: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + if answer == "null": + continue + pert = YesNoPlanned[answer.strip('"').lower()] new_entry = presentation_models.PertTeam( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + pert_team=pert ) db.session.merge(new_entry) db.session.commit() def transfer_alien_wave(nren_dict): - rows = recursive_query() + alien = recursive_query(16687) + alien = { + (nren_name, year): YesNoPlanned[answer.strip('"').lower()] for answer_id, nren_name, year, answer in alien + } + nr = recursive_query(16688) + nr = {(nren_name, year): int(answer.strip('"')) for answer_id, nren_name, year, answer in nr} + internal = recursive_query(16689) + internal = {(nren_name, year): answer == '"Yes"' for answer_id, nren_name, year, answer in internal} - for answer_id, nren_name, year, answer in rows: + for nren_name, year in alien.keys() | nr.keys() | internal.keys(): if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue @@ -939,57 +1030,136 @@ def transfer_alien_wave(nren_dict): nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + alien_wave_third_pary=alien.get((nren_name, year)), + nr_of_alien_wave_third_party_services=nr.get((nren_name, year)), + alien_wave_internal=internal.get((nren_name, year)) ) db.session.merge(new_entry) db.session.commit() def transfer_external_connections(nren_dict): - rows = recursive_query() - - for answer_id, nren_name, year, answer in rows: - if nren_name not in nren_dict: - logger.warning(f'{nren_name} unknown. Skipping.') - continue + question_nrs = { + 16694: (5, "capacity"), + 16695: (7, "capacity"), + 16696: (6, "capacity"), + 16697: (7, "from_organization"), + 16698: (1, "to_organization"), + 16699: (8, "to_organization"), + 16700: (9, "to_organization"), + 16701: (1, "from_organization"), + 16702: (8, "capacity"), + 16703: (5, "to_organization"), + 16704: (0, "link_name"), + 16705: (1, "link_name"), + 16706: (9, "capacity"), + 16707: (2, "link_name"), + 16708: (0, "from_organization"), + 16709: (4, "link_name"), + 16710: (3, "link_name"), + 16711: (9, "link_name"), + 16712: (7, "link_name"), + 16713: (8, "link_name"), + 16714: (6, "link_name"), + 16715: (5, "link_name"), + 16716: (4, "from_organization"), + 16717: (5, "from_organization"), + 16718: (6, "from_organization"), + 16719: (2, "to_organization"), + 16720: (3, "to_organization"), + 16721: (4, "to_organization"), + 16722: (6, "to_organization"), + 16723: (7, "to_organization"), + 16724: (2, "interconnection_method"), + 16725: (3, "interconnection_method"), + 16726: (4, "interconnection_method"), + 16727: (5, "interconnection_method"), + 16728: (8, "from_organization"), + 16729: (9, "from_organization"), + 16730: (0, "to_organization"), + 16731: (0, "capacity"), + 16732: (1, "capacity"), + 16733: (2, "capacity"), + 16734: (3, "capacity"), + 16735: (4, "capacity"), + 16736: (3, "from_organization"), + 16737: (2, "from_organization"), + 16738: (1, "interconnection_method"), + 16739: (7, "interconnection_method"), + 16740: (8, "interconnection_method"), + 16741: (0, "interconnection_method"), + 16742: (9, "interconnection_method"), + 16743: (6, "interconnection_method") + } - new_entry = presentation_models.ExternalConnections( - nren=nren_dict[nren_name], - nren_id=nren_dict[nren_name].id, - year=year, - ) - db.session.merge(new_entry) - db.session.commit() + def empty_connection_dict(): + return {'link_name': '', 'capacity': None, 'from_organization': '', + 'to_organization': '', 'interconnection_method': None} + connection_dicts = {} + nren_year_set = set() + for question_id, (connection_nr, field) in question_nrs.items(): + rows = recursive_query(question_id) + for answer_id, nren_name, year, answer in rows: + nren_year_set.add((nren_name, year)) + conn_dict = connection_dicts.setdefault((nren_name, year, connection_nr), empty_connection_dict()) + conn_dict[field] = answer.strip('" ') -def transfer_traffic_ratio(nren_dict): - rows = recursive_query() + int_simple = {key.replace(" ", "").lower(): value for key, value in INTERCONNECTION.items()} + int_simple['openexchangepoi'] = "open_exchange" - for answer_id, nren_name, year, answer in rows: + for conn_dict in connection_dicts.values(): + if conn_dict['capacity']: + try: + conn_dict['capacity'] = str(Decimal(conn_dict['capacity'].split('G')[0].strip())) + except: # noqa: E722 + logger.warning(f'Capacity could not be converted for {nren_name}: {conn_dict["capacity"]}.') + conn_dict['capacity'] = None + if conn_dict['interconnection_method']: + int_conn = int_simple[conn_dict['interconnection_method'].replace(" ", "").lower()] + conn_dict['interconnection_method'] = ConnectionMethod[int_conn].value + + for nren_name, year in nren_year_set: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue - new_entry = presentation_models.TrafficRatio( + connections = [] + for connection_nr in range(0, 10): + conn = connection_dicts.get((nren_name, year, connection_nr)) + if conn: + connections.append(conn) + + new_entry = presentation_models.ExternalConnections( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + connections=connections ) db.session.merge(new_entry) db.session.commit() def transfer_network_automation(nren_dict): - rows = recursive_query() + rows = recursive_query(16757) + tasks = recursive_query(16758) + tasks = {(nren_name, year): json.loads(answer) for answer_id, nren_name, year, answer in tasks} for answer_id, nren_name, year, answer in rows: if nren_name not in nren_dict: logger.warning(f'{nren_name} unknown. Skipping.') continue + network_automation = YesNoPlanned[answer.strip('"').lower()] + specifics = tasks.get((nren_name, year), []) + specifics = [s for s in specifics if s] + new_entry = presentation_models.NetworkAutomation( nren=nren_dict[nren_name], nren_id=nren_dict[nren_name].id, year=year, + network_automation=network_automation, + network_automation_specifics=specifics ) db.session.merge(new_entry) db.session.commit() @@ -998,15 +1168,15 @@ def transfer_network_automation(nren_dict): def _cli(app): with app.app_context(): nren_dict = helpers.get_uppercase_nren_dict() - # transfer_budget(nren_dict) - # transfer_funding_sources(nren_dict) - # transfer_staff_data(nren_dict) - # transfer_nren_parent_org(nren_dict) - # transfer_nren_sub_org(nren_dict) - # transfer_charging_structure(nren_dict) - # transfer_ec_projects(nren_dict) - # transfer_policies(nren_dict) - # transfer_institutions_urls(nren_dict) + transfer_budget(nren_dict) + transfer_funding_sources(nren_dict) + transfer_staff_data(nren_dict) + transfer_nren_parent_org(nren_dict) + transfer_nren_sub_org(nren_dict) + transfer_charging_structure(nren_dict) + transfer_ec_projects(nren_dict) + transfer_policies(nren_dict) + transfer_institutions_urls(nren_dict) transfer_central_procurement(nren_dict) transfer_service_management(nren_dict) @@ -1028,7 +1198,7 @@ def _cli(app): transfer_pert_team(nren_dict) transfer_alien_wave(nren_dict) transfer_external_connections(nren_dict) - transfer_traffic_ratio(nren_dict) + # traffic ratio was freeform text transfer_network_automation(nren_dict) -- GitLab