diff --git a/compendium_v2/background_task/parse_excel_data.py b/compendium_v2/background_task/parse_excel_data.py index 3e33776584e7e06a057532195c83f68fd63da94d..4148c91e547809549f7bd4d681fb3175c0822a57 100644 --- a/compendium_v2/background_task/parse_excel_data.py +++ b/compendium_v2/background_task/parse_excel_data.py @@ -39,7 +39,7 @@ def fetch_budget_excel_data(): # process the data (e.g. save to database) # print(f"NREN: {nren}, Budget: {budget}, Year: {year}") - yield nren, budget, year + yield nren.upper(), budget, year def fetch_funding_excel_data(): @@ -92,7 +92,7 @@ def fetch_funding_excel_data(): # process the data (e.g. save to database) if nren is not None: - yield (nren, year, client_institution, + yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other) @@ -148,7 +148,7 @@ def fetch_charging_structure_excel_data(): f'NREN: {nren}, Charging Structure: {charging_structure},' f' Year: {year}') - yield nren, year, charging_structure + yield nren.upper(), year, charging_structure # For 2021 yield from create_points_for_year(3, 45, 2021, 2) diff --git a/compendium_v2/publishers/helpers.py b/compendium_v2/publishers/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..4c757fc7451941e04cd4d17660560f84a46d6c56 --- /dev/null +++ b/compendium_v2/publishers/helpers.py @@ -0,0 +1,24 @@ +from compendium_v2 import db, survey_db +from compendium_v2.db import model + + +def init_db(config): + dsn_prn = config['SQLALCHEMY_DATABASE_URI'] + db.init_db_model(dsn_prn) + dsn_survey = config['SURVEY_DATABASE_URI'] + survey_db.init_db_model(dsn_survey) + + +def get_uppercase_nren_dict(session): + """ + :param session: db session that is used to query the known NRENs + :return: a dictionary of all known NRENs db entities keyed on the uppercased name + """ + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name.upper(): nren for nren in current_nrens} + # add aliases that are used in the source data: + nren_dict['ASNET'] = nren_dict['ASNET-AM'] + nren_dict['KIFU (NIIF)'] = nren_dict['KIFU'] + nren_dict['SURFNET'] = nren_dict['SURF'] + nren_dict['UOM'] = nren_dict['UOM/RICERKANET'] + return nren_dict diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py index 762260ec844060256b67f6c1c8e8b6efd878e3bc..b1ed844dbb448665ff9509c4e1a954d953685644 100644 --- a/compendium_v2/publishers/survey_publisher_2022.py +++ b/compendium_v2/publishers/survey_publisher_2022.py @@ -8,6 +8,7 @@ from compendium_v2.environment import setup_logging from compendium_v2.config import load from compendium_v2 import db, survey_db from compendium_v2.db import model +from compendium_v2.publishers import helpers setup_logging() @@ -52,13 +53,6 @@ class FundingSource(enum.Enum): GOV_PUBLIC_BODIES = 16409 -def setup_db(config): - dsn_prn = config['SQLALCHEMY_DATABASE_URI'] - db.init_db_model(dsn_prn) - dsn_survey = config['SURVEY_DATABASE_URI'] - survey_db.init_db_model(dsn_survey) - - def query_budget(): with survey_db.session_scope() as survey: return survey.execute(text(BUDGET_QUERY)) @@ -73,12 +67,11 @@ def query_funding_sources(): def transfer_budget(): with db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) rows = query_budget() for row in rows: - nren_name = row[0] + nren_name = row[0].upper() _budget = row[1] try: budget = float(_budget.replace('"', '').replace(',', '')) @@ -92,8 +85,8 @@ def transfer_budget(): f'{nren_name} has budget set to >200M EUR for 2022. ({budget})') if nren_name not in nren_dict: - nren_dict[nren_name] = model.NREN(name=nren_name) - session.add(nren_dict[nren_name]) + logger.info(f'{nren_name} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[nren_name], @@ -106,13 +99,12 @@ def transfer_budget(): def transfer_funding_sources(): with db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) sourcedata = {} for source, data in query_funding_sources(): for row in data: - nren_name = row[0] + nren_name = row[0].upper() _value = row[1] try: value = float(_value.replace('"', '').replace(',', '')) @@ -134,8 +126,8 @@ def transfer_funding_sources(): f'{nren_name} funding sources do not sum to 100%. ({total})') if nren_name not in nren_dict: - nren_dict[nren_name] = model.NREN(name=nren_name) - session.add(nren_dict[nren_name]) + logger.info(f'{nren_name} unknown. Skipping.') + continue funding_source = model.FundingSource( nren=nren_dict[nren_name], @@ -151,7 +143,7 @@ def transfer_funding_sources(): def _cli(config): - setup_db(config) + helpers.init_db(config) transfer_budget() transfer_funding_sources() diff --git a/compendium_v2/publishers/survey_publisher_v1.py b/compendium_v2/publishers/survey_publisher_v1.py index 9f5bfe1f61162788afee0baa123e3eb450b35365..073d30ac15bb373884730b386adcaa9e9c978e40 100644 --- a/compendium_v2/publishers/survey_publisher_v1.py +++ b/compendium_v2/publishers/survey_publisher_v1.py @@ -8,31 +8,24 @@ from compendium_v2.background_task import parse_excel_data from compendium_v2.config import load from compendium_v2.db import model from compendium_v2.survey_db import model as survey_model +from compendium_v2.publishers import helpers setup_logging() logger = logging.getLogger('survey-publisher-v1') -def init_db(config): - dsn_prn = config['SQLALCHEMY_DATABASE_URI'] - db.init_db_model(dsn_prn) - dsn_survey = config['SURVEY_DATABASE_URI'] - survey_db.init_db_model(dsn_survey) - - def db_budget_migration(): with survey_db.session_scope() as survey_session, \ db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) # move data from Survey DB budget table data = survey_session.query(survey_model.Nrens) for nren in data: for budget in nren.budgets: - abbrev = nren.abbreviation + abbrev = nren.abbreviation.upper() year = budget.year if float(budget.budget) > 200: @@ -42,8 +35,8 @@ def db_budget_migration(): f'>200M EUR for {year}. ({budget.budget})') if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[abbrev], budget=float(budget.budget), year=year) @@ -54,8 +47,8 @@ def db_budget_migration(): for abbrev, budget, year in exceldata: if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[abbrev], budget=budget, year=year) @@ -65,9 +58,7 @@ def db_budget_migration(): def db_funding_migration(): with db.session_scope() as session: - - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) # Import the data to database data = parse_excel_data.fetch_funding_excel_data() @@ -86,8 +77,8 @@ def db_funding_migration(): f' do not sum to 100% ({total})') if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.FundingSource( nren=nren_dict[abbrev], @@ -103,18 +94,24 @@ def db_funding_migration(): def db_charging_structure_migration(): with db.session_scope() as session: + nren_dict = helpers.get_uppercase_nren_dict(session) + # Import the data to database data = parse_excel_data.fetch_charging_structure_excel_data() - for (nren, year, charging_structure) in data: + for (abbrev, year, charging_structure) in data: + if abbrev not in nren_dict: + logger.info(f'{abbrev} unknown. Skipping.') + continue + charging_structure_entry = model.ChargingStructure( - nren=nren, year=year, fee_type=charging_structure) + nren=nren_dict[abbrev], year=year, fee_type=charging_structure) session.merge(charging_structure_entry) session.commit() def _cli(config): - init_db(config) + helpers.init_db(config) db_budget_migration() db_funding_migration() db_charging_structure_migration()