From dd96c4d71785e6205a81710d23f2bd8f6b3f9170 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Sat, 8 Apr 2023 14:08:24 +0200 Subject: [PATCH 01/11] normalization of nrens including datamigration; the publishers have not yet been fixed and are broken for now --- compendium_v2/db/model.py | 28 +++++++-- .../versions/df2536b06f35_normalize_nrens.py | 61 +++++++++++++++++++ compendium_v2/routes/budget.py | 5 +- compendium_v2/routes/funding.py | 6 +- test/conftest.py | 16 +++-- 5 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py index 45d4758d..c721580f 100644 --- a/compendium_v2/db/model.py +++ b/compendium_v2/db/model.py @@ -3,26 +3,44 @@ import sqlalchemy as sa from typing import Any +from sqlalchemy import MetaData from sqlalchemy.ext.declarative import declarative_base - -# from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship logger = logging.getLogger(__name__) +convention = { + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", +} + +metadata_obj = MetaData(naming_convention=convention) + # https://github.com/python/mypy/issues/2477 -base_schema: Any = declarative_base() +base_schema: Any = declarative_base(metadata=metadata_obj) + + +class NREN(base_schema): + __tablename__ = 'nrens' + id = sa.Column(sa.BigInteger, primary_key=True) + name = sa.Column(sa.String(128), nullable=False) class BudgetEntry(base_schema): __tablename__ = 'budgets' - nren = sa.Column(sa.String(128), primary_key=True) + nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren = relationship(NREN) year = sa.Column(sa.Integer, primary_key=True) budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) class FundingSource(base_schema): __tablename__ = 'funding_source' - nren = sa.Column(sa.String(128), primary_key=True) + nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren = relationship(NREN) year = sa.Column(sa.Integer, primary_key=True) client_institutions = sa.Column( sa.Numeric(asdecimal=False), nullable=False) diff --git a/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py b/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py new file mode 100644 index 00000000..f5396f01 --- /dev/null +++ b/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py @@ -0,0 +1,61 @@ +"""normalize nrens + +Revision ID: df2536b06f35 +Revises: b70ada054046 +Create Date: 2023-04-08 09:00:28.451307 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'df2536b06f35' +down_revision = 'b70ada054046' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'nrens', + sa.Column('id', sa.BigInteger(), nullable=False), + sa.Column('name', sa.String(length=128), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('pk_nrens')) + ) + op.execute( + "INSERT INTO nrens (name) " + "SELECT DISTINCT nren FROM budgets UNION SELECT DISTINCT nren FROM funding_source;" + ) + + op.add_column('budgets', sa.Column('nren_id', sa.BigInteger())) + op.execute("UPDATE budgets SET nren_id = nrens.id FROM nrens WHERE budgets.nren = nrens.name") + op.alter_column('budgets', 'nren_id', nullable=False) + op.create_foreign_key(op.f('fk_budgets_nren_id_nrens'), 'budgets', 'nrens', ['nren_id'], ['id']) + op.drop_column('budgets', 'nren') + + op.add_column('funding_source', sa.Column('nren_id', sa.BigInteger())) + op.execute("UPDATE funding_source SET nren_id = nrens.id FROM nrens WHERE funding_source.nren = nrens.name") + op.alter_column('funding_source', 'nren_id', nullable=False) + op.create_foreign_key(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', 'nrens', ['nren_id'], ['id']) + op.drop_column('funding_source', 'nren') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('funding_source', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) + op.execute("UPDATE funding_source SET nren = nrens.name FROM nrens WHERE funding_source.nren_id = nrens.id") + op.alter_column('funding_source', 'nren', nullable=False) + op.drop_constraint(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', type_='foreignkey') + op.drop_column('funding_source', 'nren_id') + + op.add_column('budgets', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) + op.execute("UPDATE budgets SET nren = nrens.name FROM nrens WHERE budgets.nren_id = nrens.id") + op.alter_column('budgets', 'nren', nullable=False) + op.drop_constraint(op.f('fk_budgets_nren_id_nrens'), 'budgets', type_='foreignkey') + op.drop_column('budgets', 'nren_id') + + op.drop_table('nrens') + # ### end Alembic commands ### diff --git a/compendium_v2/routes/budget.py b/compendium_v2/routes/budget.py index 8900a188..42dca529 100644 --- a/compendium_v2/routes/budget.py +++ b/compendium_v2/routes/budget.py @@ -2,6 +2,7 @@ import logging from typing import Any from flask import Blueprint, jsonify, current_app +from sqlalchemy.orm import joinedload from compendium_v2 import db, survey_db from compendium_v2.db import model @@ -62,13 +63,13 @@ def budget_view() -> Any: def _extract_data(entry: model.BudgetEntry): return { - 'NREN': entry.nren, + 'NREN': entry.nren.name, 'BUDGET': float(entry.budget), 'BUDGET_YEAR': entry.year, } with db.session_scope() as session: entries = sorted([_extract_data(entry) - for entry in session.query(model.BudgetEntry)], + for entry in session.query(model.BudgetEntry).options(joinedload(model.BudgetEntry.nren))], key=lambda d: (d['BUDGET_YEAR'], d['NREN'])) return jsonify(entries) diff --git a/compendium_v2/routes/funding.py b/compendium_v2/routes/funding.py index 0c9b10d8..7d396a6a 100644 --- a/compendium_v2/routes/funding.py +++ b/compendium_v2/routes/funding.py @@ -1,6 +1,8 @@ import logging from flask import Blueprint, jsonify, current_app +from sqlalchemy.orm import joinedload + from compendium_v2 import db from compendium_v2.routes import common from compendium_v2.db import model @@ -60,7 +62,7 @@ def funding_source_view() -> Any: def _extract_data(entry: model.FundingSource): return { - 'NREN': entry.nren, + 'NREN': entry.nren.name, 'YEAR': int(entry.year), 'CLIENT_INSTITUTIONS': float(entry.client_institutions), 'EUROPEAN_FUNDING': float(entry.european_funding), @@ -71,6 +73,6 @@ def funding_source_view() -> Any: with db.session_scope() as session: entries = sorted([_extract_data(entry) - for entry in session.query(model.FundingSource)], + for entry in session.query(model.FundingSource).options(joinedload(model.FundingSource.nren))], key=lambda d: (d['NREN'], d['YEAR'])) return jsonify(entries) diff --git a/test/conftest.py b/test/conftest.py index bc36503c..2b69164c 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -71,9 +71,13 @@ def mocked_db(mocker): @pytest.fixture def test_budget_data(): with db.session_scope() as session: - data = _test_data_csv("BudgetTestData.csv") + data = [row for row in _test_data_csv("BudgetTestData.csv")] + nren_names = set([row["nren"] for row in data]) + nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)} + session.add_all(nren_dict.values()) + for row in data: - nren = row["nren"] + nren = nren_dict[row["nren"]] budget = row["budget"] year = row["year"] @@ -115,9 +119,13 @@ def test_budget_data(): @pytest.fixture def test_funding_source_data(): with db.session_scope() as session: - data = _test_data_csv("FundingSourceTestData.csv") + data = [row for row in _test_data_csv("FundingSourceTestData.csv")] + nren_names = set([row["nren"] for row in data]) + nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)} + session.add_all(nren_dict.values()) + for row in data: - nren = row["nren"] + nren = nren_dict[row["nren"]] year = row["year"] client = row["client"] european = row["european"] -- GitLab From 6ccb6b993e1f05ace36f148594f9d35053072f81 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Sat, 8 Apr 2023 21:10:54 +0200 Subject: [PATCH 02/11] use int instead of bigint for the id column to make sqlite autoincrement work --- compendium_v2/db/model.py | 6 +++--- .../migrations/versions/df2536b06f35_normalize_nrens.py | 6 +++--- test/conftest.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py index c721580f..4851d44e 100644 --- a/compendium_v2/db/model.py +++ b/compendium_v2/db/model.py @@ -25,13 +25,13 @@ base_schema: Any = declarative_base(metadata=metadata_obj) class NREN(base_schema): __tablename__ = 'nrens' - id = sa.Column(sa.BigInteger, primary_key=True) + id = sa.Column(sa.Integer, primary_key=True) name = sa.Column(sa.String(128), nullable=False) class BudgetEntry(base_schema): __tablename__ = 'budgets' - nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN) year = sa.Column(sa.Integer, primary_key=True) budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) @@ -39,7 +39,7 @@ class BudgetEntry(base_schema): class FundingSource(base_schema): __tablename__ = 'funding_source' - nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN) year = sa.Column(sa.Integer, primary_key=True) client_institutions = sa.Column( diff --git a/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py b/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py index f5396f01..ff64045a 100644 --- a/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py +++ b/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py @@ -20,7 +20,7 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.create_table( 'nrens', - sa.Column('id', sa.BigInteger(), nullable=False), + sa.Column('id', sa.Integer(), nullable=False), sa.Column('name', sa.String(length=128), nullable=False), sa.PrimaryKeyConstraint('id', name=op.f('pk_nrens')) ) @@ -29,13 +29,13 @@ def upgrade(): "SELECT DISTINCT nren FROM budgets UNION SELECT DISTINCT nren FROM funding_source;" ) - op.add_column('budgets', sa.Column('nren_id', sa.BigInteger())) + op.add_column('budgets', sa.Column('nren_id', sa.Integer())) op.execute("UPDATE budgets SET nren_id = nrens.id FROM nrens WHERE budgets.nren = nrens.name") op.alter_column('budgets', 'nren_id', nullable=False) op.create_foreign_key(op.f('fk_budgets_nren_id_nrens'), 'budgets', 'nrens', ['nren_id'], ['id']) op.drop_column('budgets', 'nren') - op.add_column('funding_source', sa.Column('nren_id', sa.BigInteger())) + op.add_column('funding_source', sa.Column('nren_id', sa.Integer())) op.execute("UPDATE funding_source SET nren_id = nrens.id FROM nrens WHERE funding_source.nren = nrens.name") op.alter_column('funding_source', 'nren_id', nullable=False) op.create_foreign_key(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', 'nrens', ['nren_id'], ['id']) diff --git a/test/conftest.py b/test/conftest.py index 2b69164c..b4df08e6 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -73,7 +73,7 @@ def test_budget_data(): with db.session_scope() as session: data = [row for row in _test_data_csv("BudgetTestData.csv")] nren_names = set([row["nren"] for row in data]) - nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)} + nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} session.add_all(nren_dict.values()) for row in data: @@ -121,7 +121,7 @@ def test_funding_source_data(): with db.session_scope() as session: data = [row for row in _test_data_csv("FundingSourceTestData.csv")] nren_names = set([row["nren"] for row in data]) - nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)} + nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} session.add_all(nren_dict.values()) for row in data: -- GitLab From 1e959770513d2f77bf7594b0305b70e83213a085 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Sat, 8 Apr 2023 22:27:18 +0200 Subject: [PATCH 03/11] adapt publishers to normalized nrens --- .../publishers/survey_publisher_2022.py | 65 +++++++++---------- .../publishers/survey_publisher_v1.py | 31 +++++++-- test/test_survey_publisher_2022.py | 12 ++-- tox.ini | 1 + 4 files changed, 63 insertions(+), 46 deletions(-) diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py index 37d55e81..762260ec 100644 --- a/compendium_v2/publishers/survey_publisher_2022.py +++ b/compendium_v2/publishers/survey_publisher_2022.py @@ -73,23 +73,30 @@ def query_funding_sources(): def transfer_budget(): with db.session_scope() as session: + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name: nren for nren in current_nrens} + rows = query_budget() for row in rows: - nren = row[0] + nren_name = row[0] _budget = row[1] try: budget = float(_budget.replace('"', '').replace(',', '')) except ValueError: logger.info( - f'{nren} has no budget for 2022. Skipping. ({_budget}))') + f'{nren_name} has no budget for 2022. Skipping. ({_budget}))') continue if budget > 200: logger.info( - f'{nren} has budget set to >200M EUR for 2022. ({budget})') + f'{nren_name} has budget set to >200M EUR for 2022. ({budget})') + + if nren_name not in nren_dict: + nren_dict[nren_name] = model.NREN(name=nren_name) + session.add(nren_dict[nren_name]) budget_entry = model.BudgetEntry( - nren=nren, + nren=nren_dict[nren_name], budget=budget, year=2022, ) @@ -99,55 +106,45 @@ def transfer_budget(): def transfer_funding_sources(): with db.session_scope() as session: - sources = {source.value: dict() for source in FundingSource} - nrens = set() + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name: nren for nren in current_nrens} + sourcedata = {} for source, data in query_funding_sources(): for row in data: - nren = row[0] - nrens.add(nren) + nren_name = row[0] _value = row[1] try: value = float(_value.replace('"', '').replace(',', '')) except ValueError: name = source.name logger.info( - f'{nren} has invalid value for {name}.' + f'{nren_name} has invalid value for {name}.' + f' ({_value}))') value = 0 - sources[source.value][nren] = value - - client_institutions = sources[FundingSource.CLIENT_INSTITUTIONS.value] - european_funding = sources[FundingSource.EUROPEAN_FUNDING.value] - gov_public_bodies = sources[FundingSource.GOV_PUBLIC_BODIES.value] - commercial = sources[FundingSource.COMMERCIAL.value] - other = sources[FundingSource.OTHER.value] - - _data = [client_institutions, european_funding, - gov_public_bodies, commercial, other] + nren_info = sourcedata.setdefault(nren_name, {source_type: 0 for source_type in FundingSource}) + nren_info[source] = value - for nren in nrens: - - def _get_nren(source, nren): - try: - return source[nren] - except KeyError: - return 0 - total = sum([_get_nren(source, nren) for source in _data]) + for nren_name, nren_info in sourcedata.items(): + total = sum(nren_info.values()) if not math.isclose(total, 100, abs_tol=0.01): logger.info( - f'{nren} funding sources do not sum to 100%. ({total})') + f'{nren_name} funding sources do not sum to 100%. ({total})') + + if nren_name not in nren_dict: + nren_dict[nren_name] = model.NREN(name=nren_name) + session.add(nren_dict[nren_name]) funding_source = model.FundingSource( - nren=nren, + nren=nren_dict[nren_name], year=2022, - client_institutions=client_institutions.get(nren, 0), - european_funding=european_funding.get(nren, 0), - gov_public_bodies=gov_public_bodies.get(nren, 0), - commercial=commercial.get(nren, 0), - other=other.get(nren, 0), + client_institutions=nren_info[FundingSource.CLIENT_INSTITUTIONS], + european_funding=nren_info[FundingSource.EUROPEAN_FUNDING], + gov_public_bodies=nren_info[FundingSource.GOV_PUBLIC_BODIES], + commercial=nren_info[FundingSource.COMMERCIAL], + other=nren_info[FundingSource.OTHER], ) session.merge(funding_source) session.commit() diff --git a/compendium_v2/publishers/survey_publisher_v1.py b/compendium_v2/publishers/survey_publisher_v1.py index 6c6320f8..2989e4f1 100644 --- a/compendium_v2/publishers/survey_publisher_v1.py +++ b/compendium_v2/publishers/survey_publisher_v1.py @@ -25,6 +25,9 @@ def db_budget_migration(): with survey_db.session_scope() as survey_session, \ db.session_scope() as session: + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name: nren for nren in current_nrens} + # move data from Survey DB budget table data = survey_session.query(survey_model.Nrens) for nren in data: @@ -38,16 +41,24 @@ def db_budget_migration(): f'{abbrev} has budget set to ' f'>200M EUR for {year}. ({budget.budget})') + if abbrev not in nren_dict: + nren_dict[abbrev] = model.NREN(name=abbrev) + session.add(nren_dict[abbrev]) + budget_entry = model.BudgetEntry( - nren=abbrev, budget=float(budget.budget), year=year) + nren=nren_dict[abbrev], budget=float(budget.budget), year=year) session.merge(budget_entry) # Import the data from excel sheet to database exceldata = parse_excel_data.fetch_budget_excel_data() - for nren, budget, year in exceldata: + for abbrev, budget, year in exceldata: + if abbrev not in nren_dict: + nren_dict[abbrev] = model.NREN(name=abbrev) + session.add(nren_dict[abbrev]) + budget_entry = model.BudgetEntry( - nren=nren, budget=budget, year=year) + nren=nren_dict[abbrev], budget=budget, year=year) session.merge(budget_entry) session.commit() @@ -55,10 +66,13 @@ def db_budget_migration(): def db_funding_migration(): with db.session_scope() as session: + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name: nren for nren in current_nrens} + # Import the data to database data = parse_excel_data.fetch_funding_excel_data() - for (nren, year, client_institution, + for (abbrev, year, client_institution, european_funding, gov_public_bodies, commercial, other) in data: @@ -68,11 +82,16 @@ def db_funding_migration(): total = sum(_data) if not math.isclose(total, 100, abs_tol=0.01): logger.info( - f'{nren} funding sources for {year}' + f'{abbrev} funding sources for {year}' f' do not sum to 100% ({total})') + if abbrev not in nren_dict: + nren_dict[abbrev] = model.NREN(name=abbrev) + session.add(nren_dict[abbrev]) + budget_entry = model.FundingSource( - nren=nren, year=year, + nren=nren_dict[abbrev], + year=year, client_institutions=client_institution, european_funding=european_funding, gov_public_bodies=gov_public_bodies, diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py index ebfb4acf..ea3b158f 100644 --- a/test/test_survey_publisher_2022.py +++ b/test/test_survey_publisher_2022.py @@ -40,25 +40,25 @@ def test_publisher(client, mocker, dummy_config): with db.session_scope() as session: budgets = session.query(model.BudgetEntry).order_by( - model.BudgetEntry.nren.asc()).all() + model.BudgetEntry.nren_id.asc()).all() assert len(budgets) == 3 - assert budgets[0].nren == 'nren1' + assert budgets[0].nren.name == 'nren1' assert budgets[0].budget == 100 funding_sources = session.query(model.FundingSource).order_by( - model.FundingSource.nren.asc()).all() + model.FundingSource.nren_id.asc()).all() assert len(funding_sources) == 3 - assert funding_sources[0].nren == 'nren1' + assert funding_sources[0].nren.name == 'nren1' assert funding_sources[0].client_institutions == 10 assert funding_sources[0].european_funding == 50 assert funding_sources[0].other == 40 - assert funding_sources[1].nren == 'nren2' + assert funding_sources[1].nren.name == 'nren2' assert funding_sources[1].client_institutions == 80 assert funding_sources[1].european_funding == 20 assert funding_sources[1].other == 0 - assert funding_sources[2].nren == 'nren3' + assert funding_sources[2].nren.name == 'nren3' assert funding_sources[2].client_institutions == 30 assert funding_sources[2].european_funding == 30 assert funding_sources[2].other == 30 diff --git a/tox.ini b/tox.ini index de6d0a45..020e8315 100644 --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,7 @@ envlist = py39 [flake8] exclude = venv,.tox,webapp +max-line-length = 120 [testenv] deps = -- GitLab From 631fede95f563dd4586679ceae8a20e24f6e5628 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 20:25:32 +0200 Subject: [PATCH 04/11] add survey db to config-example and dont use it in budget route --- compendium_v2/routes/budget.py | 4 +--- config-example.json | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compendium_v2/routes/budget.py b/compendium_v2/routes/budget.py index 42dca529..fd1b41e2 100644 --- a/compendium_v2/routes/budget.py +++ b/compendium_v2/routes/budget.py @@ -4,7 +4,7 @@ from typing import Any from flask import Blueprint, jsonify, current_app from sqlalchemy.orm import joinedload -from compendium_v2 import db, survey_db +from compendium_v2 import db from compendium_v2.db import model from compendium_v2.routes import common @@ -16,8 +16,6 @@ def before_request(): config = current_app.config['CONFIG_PARAMS'] dsn_prn = config['SQLALCHEMY_DATABASE_URI'] db.init_db_model(dsn_prn) - dsn_survey = config['SURVEY_DATABASE_URI'] - survey_db.init_db_model(dsn_survey) logger = logging.getLogger(__name__) diff --git a/config-example.json b/config-example.json index 4f04692c..f480ed92 100644 --- a/config-example.json +++ b/config-example.json @@ -1,3 +1,4 @@ { - "SQLALCHEMY_DATABASE_URI": "postgresql://compendium:compendium321@localhost:65000/compendium" + "SQLALCHEMY_DATABASE_URI": "postgresql://compendium:compendium321@localhost:65000/compendium", + "SURVEY_DATABASE_URI": "" } -- GitLab From 8d06ada9d50a65c3fe7e654c5a728467ce47f449 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 20:39:06 +0200 Subject: [PATCH 05/11] update readmes and remove unused default secret key --- README.md | 64 ++++++++++++--------------------------- compendium_v2/__init__.py | 1 - webapp/README.md | 2 +- 3 files changed, 21 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index e853f78f..81b1b0b6 100644 --- a/README.md +++ b/README.md @@ -2,21 +2,18 @@ ## Installation Process -```bash -$ git clone https://gitlab.geant.net/live-projects/compendium-v2.git -``` +Clone this repository using your favorite method, then: ```bash $ python3 -m venv compendium-v2 $ .compendium-v2/bin/activate -$ pip install -r requirements-dev.txt -$ cd compendium-v2 +$ pip install -r requirements.txt $ tox -e py39 ``` ## Overview -This module implements a skeleton Flask-based webservice +This module implements a Flask-based webservice and in-browser React front-end. The webservice communicates with the front end over HTTP. @@ -24,25 +21,18 @@ Responses to valid requests are returned as JSON messages. The server will therefore return an error unless `application/json` is in the `Accept` request header field. -HTTP communication and JSON grammar details are -beyond the scope of this document. -Please refer to [RFC 2616](https://tools.ietf.org/html/rfc2616) -and www.json.org for more details. - - ## Configuration This app allows specification of a few example configuration parameters. These parameters should be stored in a file formatted -similarly to `config.json.example`, and the name +similarly to `config-example.json`, and the name of this file should be stored in the environment variable `SETTINGS_FILENAME` when running the service. ## Building the web application -The initial repository doesn't contain the required web application. -For instructions on building this see `webapp/README.md`. +For instructions on building the React frontend see `webapp/README.md`. ## Running this module @@ -54,44 +44,30 @@ For example, the application could be launched as follows: ```bash $ export FLASK_APP=compendium_v2.app $ export SETTINGS_FILENAME=config-example.json -$ flask db upgrade $ flask run ``` See https://flask.palletsprojects.com/en/2.1.x/deploying/ for best practices about running in production environments. -### resources - -Any non-empty responses are JSON formatted messages. +## Importing the historical data -#### /data/version +Pip install can use the setup.py file to register the click cli commands: - * /version +```bash +pip install --editable . +survey-publisher-v1 +survey-publisher-2022 +``` - The response will be an object - containing the module and protocol versions of the - running server and will be formatted as follows: +## Creating a db migration after editing the sqlalchemy models - ```json - { - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "api": { - "type": "string", - "pattern": r'\d+\.\d+' - }, - "module": { - "type": "string", - "pattern": r'\d+\.\d+' - } - }, - "required": ["api", "module"], - "additionalProperties": False - } - ``` +```bash +cd compendium_v2 +alembic revision --autogenerate -m "description" +``` -#### /test/test1 +Then go to the created migration file to make any necessary additions, for example to migrate data. +Also see https://alembic.sqlalchemy.org/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect -The response will be some json data, as an example ... +Note that starting the application applies all upgrades. diff --git a/compendium_v2/__init__.py b/compendium_v2/__init__.py index ad265a9b..33868c28 100644 --- a/compendium_v2/__init__.py +++ b/compendium_v2/__init__.py @@ -34,7 +34,6 @@ def create_app() -> Flask: app = Flask(__name__) CORS(app) - app.secret_key = 'super secret session key' app.config['CONFIG_PARAMS'] = app_config from compendium_v2.routes import default diff --git a/webapp/README.md b/webapp/README.md index 8d2ed7c3..bb42157a 100644 --- a/webapp/README.md +++ b/webapp/README.md @@ -5,7 +5,7 @@ From this folder, run: ```bash -$ npm init +$ npm install ``` To run the webpack development server: -- GitLab From 6778d93fc5ae70aea29fbcd57492ea938ab80ede Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 21:23:42 +0200 Subject: [PATCH 06/11] start of hardcoded nren list (not working yet) --- compendium_v2/db/model.py | 9 +- .../versions/2b698bb45c09_normalize_nrens.py | 237 ++++++++++++++++++ .../versions/df2536b06f35_normalize_nrens.py | 61 ----- compendium_v2/routes/budget.py | 2 +- compendium_v2/routes/charging.py | 2 +- compendium_v2/routes/funding.py | 2 +- 6 files changed, 245 insertions(+), 68 deletions(-) create mode 100644 compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py delete mode 100644 compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py index d62e22c0..594c7dfe 100644 --- a/compendium_v2/db/model.py +++ b/compendium_v2/db/model.py @@ -25,7 +25,7 @@ base_schema: Any = declarative_base(metadata=metadata_obj) class NREN(base_schema): - __tablename__ = 'nrens' + __tablename__ = 'nren' id = sa.Column(sa.Integer, primary_key=True) name = sa.Column(sa.String(128), nullable=False) @@ -33,7 +33,7 @@ class NREN(base_schema): class BudgetEntry(base_schema): __tablename__ = 'budgets' nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) - nren = relationship(NREN) + nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) @@ -41,7 +41,7 @@ class BudgetEntry(base_schema): class FundingSource(base_schema): __tablename__ = 'funding_source' nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) - nren = relationship(NREN) + nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) client_institutions = sa.Column( sa.Numeric(asdecimal=False), nullable=False) @@ -61,7 +61,8 @@ class FeeType(Enum): class ChargingStructure(base_schema): __tablename__ = 'charging_structure' - nren = sa.Column(sa.String(128), primary_key=True) + nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) fee_type = sa.Column('fee_type', sa.Enum("flat_fee", "usage_based_fee", "combination", "no_charge", diff --git a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py new file mode 100644 index 00000000..ed816a5b --- /dev/null +++ b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py @@ -0,0 +1,237 @@ +"""normalize nrens + +Revision ID: 2b698bb45c09 +Revises: b123f21a8f4c +Create Date: 2023-04-12 20:51:05.934130 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '2b698bb45c09' +down_revision = 'b123f21a8f4c' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'nren', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=128), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('pk_nren')) + ) + + op.execute( + "INSERT INTO nren (name) VALUES " + "('AAF - Australian Access Federation')," + "('AARNet')," + "('ACOnet')," + "('AfgREN')," + "('AMRES')," + "('ANA')," + "('Ankabut')," + "('Arandu')," + "('ARENA')," + "('ARN')," + "('ARNES')," + "('ASGC')," + "('ASNET-AM')," + "('AzRena')," + "('AzScienceNet')," + "('BARNet')," + "('BASNET')," + "('BdREN')," + "('Belnet')," + "('BIHARNET')," + "('BOLNET')," + "('BREN')," + "('Brunet')," + "('CameroonianNREN')," + "('CANARIE')," + "('CARNET')," + "('CEDIA')," + "('CERNET')," + "('CESNET')," + "('CNRS')," + "('CNTI')," + "('CRNet')," + "('CSTNet')," + "('CUDI')," + "('CYNET')," + "('DeIC')," + "('DFN')," + "('e-ARENA')," + "('eb@le')," + "('EENet')," + "('ErdemNet')," + "('ERNET')," + "('EtherNet')," + "('EUN')," + "('FCCN')," + "('FREEnet')," + "('Funet')," + "('GabonREN')," + "('GARNET')," + "('GARR')," + "('GCC')," + "('GRENA')," + "('GRNET S.A.')," + "('HARNET')," + "('HEAnet')," + "('INNOVA|RED')," + "('Internet2')," + "('IRANET')," + "('IRANET/IPM')," + "('iRENALA')," + "('ITB')," + "('ITC')," + "('IUCC')," + "('Jisc')," + "('JREN')," + "('JUNet')," + "('KAUST')," + "('KAZRENA')," + "('KENET')," + "('KIFU')," + "('KOREN')," + "('KREN')," + "('KRENA-AKNET')," + "('KREONET')," + "('LAT')," + "('LEARN')," + "('LERNET')," + "('LITNET')," + "('MaliREN')," + "('MAREN')," + "('MARnet')," + "('MARWAN')," + "('mmREN')," + "('MoRENet')," + "('MREN')," + "('MYREN')," + "('NCHC')," + "('ngNER')," + "('ngREN')," + "('NiCT')," + "('NigerREN')," + "('NII')," + "('NITC')," + "('NREN')," + "('OMREN')," + "('PADI2')," + "('PERN')," + "('PIONIER')," + "('PNGARNet')," + "('PREGINET')," + "('Qatar Foundation')," + "('RAAP')," + "('RADEI')," + "('RAGIE')," + "('RAICES')," + "('RAU')," + "('REACCIUN')," + "('REANNZ')," + "('RedCyT')," + "('RedIRIS')," + "('RedUNIV')," + "('RENAM')," + "('RENATA')," + "('RENATER')," + "('RENER')," + "('RENIA')," + "('RENU')," + "('RerBenin')," + "('RESTENA')," + "('REUNA')," + "('RHnet')," + "('RITER')," + "('RNERT')," + "('RNP')," + "('RNRT')," + "('RNU')," + "('RoEduNet')," + "('RUB')," + "('RwEdNet')," + "('SANET')," + "('SANReN')," + "('SAREN')," + "('SARNET')," + "('SHERN')," + "('SigmaNet')," + "('SIKT')," + "('SingAREN')," + "('snRER')," + "('Somaliren')," + "('SudREN')," + "('SUNET')," + "('SUREN')," + "('SURF')," + "('SWITCH')," + "('TARENA')," + "('TENET')," + "('TERNET')," + "('ThaiREN')," + "('TTRENT')," + "('TuRENA')," + "('UARNet')," + "('ULAKBIM')," + "('UNITEC')," + "('UNREN')," + "('UoM/RicerkaNet')," + "('URAN')," + "('UzSciNet')," + "('VinaREN')," + "('ZAMREN')" + ) + + op.add_column('budgets', sa.Column('nren_id', sa.Integer())) + op.execute( + "UPDATE budgets SET nren_id = nren.id FROM nren " + "WHERE budgets.nren = nren.name" + " OR (budgets.nren = 'bla_alias' AND nren.name = 'bla')" + # TODO etc + ) + op.alter_column('budgets', 'nren_id', nullable=False) + op.create_foreign_key(op.f('fk_budgets_nren_id_nren'), 'budgets', 'nren', ['nren_id'], ['id']) + op.drop_column('budgets', 'nren') + + op.add_column('funding_source', sa.Column('nren_id', sa.Integer())) + # TODO + op.alter_column('funding_source', 'nren_id', nullable=False) + op.create_foreign_key(op.f('fk_funding_source_nren_id_nren'), 'funding_source', 'nren', ['nren_id'], ['id']) + op.drop_column('funding_source', 'nren') + + op.add_column('charging_structure', sa.Column('nren_id', sa.Integer())) + # TODO + op.alter_column('charging_structure', 'nren_id', nullable=False) + op.create_foreign_key(op.f('fk_charging_structure_nren_id_nren'), 'charging_structure', 'nren', ['nren_id'], ['id']) + op.drop_column('charging_structure', 'nren') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('charging_structure', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) + op.execute("UPDATE charging_structure SET nren = nren.name FROM nren WHERE charging_structure.nren_id = nren.id") + op.alter_column('charging_structure', 'nren', nullable=False) + op.drop_constraint(op.f('fk_charging_structure_nren_id_nren'), 'charging_structure', type_='foreignkey') + op.drop_column('charging_structure', 'nren_id') + + op.add_column('funding_source', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) + op.execute("UPDATE funding_source SET nren = nren.name FROM nren WHERE funding_source.nren_id = nren.id") + op.alter_column('funding_source', 'nren', nullable=False) + op.drop_constraint(op.f('fk_funding_source_nren_id_nren'), 'funding_source', type_='foreignkey') + op.drop_column('funding_source', 'nren_id') + + op.add_column('budgets', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) + op.execute("UPDATE budgets SET nren = nren.name FROM nren WHERE budgets.nren_id = nren.id") + op.alter_column('budgets', 'nren', nullable=False) + op.drop_constraint(op.f('fk_budgets_nren_id_nren'), 'budgets', type_='foreignkey') + op.drop_column('budgets', 'nren_id') + + op.drop_table('nren') + # ### end Alembic commands ### diff --git a/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py b/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py deleted file mode 100644 index ff64045a..00000000 --- a/compendium_v2/migrations/versions/df2536b06f35_normalize_nrens.py +++ /dev/null @@ -1,61 +0,0 @@ -"""normalize nrens - -Revision ID: df2536b06f35 -Revises: b70ada054046 -Create Date: 2023-04-08 09:00:28.451307 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'df2536b06f35' -down_revision = 'b70ada054046' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'nrens', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('name', sa.String(length=128), nullable=False), - sa.PrimaryKeyConstraint('id', name=op.f('pk_nrens')) - ) - op.execute( - "INSERT INTO nrens (name) " - "SELECT DISTINCT nren FROM budgets UNION SELECT DISTINCT nren FROM funding_source;" - ) - - op.add_column('budgets', sa.Column('nren_id', sa.Integer())) - op.execute("UPDATE budgets SET nren_id = nrens.id FROM nrens WHERE budgets.nren = nrens.name") - op.alter_column('budgets', 'nren_id', nullable=False) - op.create_foreign_key(op.f('fk_budgets_nren_id_nrens'), 'budgets', 'nrens', ['nren_id'], ['id']) - op.drop_column('budgets', 'nren') - - op.add_column('funding_source', sa.Column('nren_id', sa.Integer())) - op.execute("UPDATE funding_source SET nren_id = nrens.id FROM nrens WHERE funding_source.nren = nrens.name") - op.alter_column('funding_source', 'nren_id', nullable=False) - op.create_foreign_key(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', 'nrens', ['nren_id'], ['id']) - op.drop_column('funding_source', 'nren') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('funding_source', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) - op.execute("UPDATE funding_source SET nren = nrens.name FROM nrens WHERE funding_source.nren_id = nrens.id") - op.alter_column('funding_source', 'nren', nullable=False) - op.drop_constraint(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', type_='foreignkey') - op.drop_column('funding_source', 'nren_id') - - op.add_column('budgets', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) - op.execute("UPDATE budgets SET nren = nrens.name FROM nrens WHERE budgets.nren_id = nrens.id") - op.alter_column('budgets', 'nren', nullable=False) - op.drop_constraint(op.f('fk_budgets_nren_id_nrens'), 'budgets', type_='foreignkey') - op.drop_column('budgets', 'nren_id') - - op.drop_table('nrens') - # ### end Alembic commands ### diff --git a/compendium_v2/routes/budget.py b/compendium_v2/routes/budget.py index fd1b41e2..4e3136ad 100644 --- a/compendium_v2/routes/budget.py +++ b/compendium_v2/routes/budget.py @@ -68,6 +68,6 @@ def budget_view() -> Any: with db.session_scope() as session: entries = sorted([_extract_data(entry) - for entry in session.query(model.BudgetEntry).options(joinedload(model.BudgetEntry.nren))], + for entry in session.query(model.BudgetEntry)], key=lambda d: (d['BUDGET_YEAR'], d['NREN'])) return jsonify(entries) diff --git a/compendium_v2/routes/charging.py b/compendium_v2/routes/charging.py index a0583e40..eefeb017 100644 --- a/compendium_v2/routes/charging.py +++ b/compendium_v2/routes/charging.py @@ -55,7 +55,7 @@ def charging_structure_view() -> Any: def _extract_data(entry: model.ChargingStructure): return { - 'NREN': entry.nren, + 'NREN': entry.nren.name, 'YEAR': int(entry.year), 'FEE_TYPE': entry.fee_type, } diff --git a/compendium_v2/routes/funding.py b/compendium_v2/routes/funding.py index 7d396a6a..a960153c 100644 --- a/compendium_v2/routes/funding.py +++ b/compendium_v2/routes/funding.py @@ -73,6 +73,6 @@ def funding_source_view() -> Any: with db.session_scope() as session: entries = sorted([_extract_data(entry) - for entry in session.query(model.FundingSource).options(joinedload(model.FundingSource.nren))], + for entry in session.query(model.FundingSource)], key=lambda d: (d['NREN'], d['YEAR'])) return jsonify(entries) -- GitLab From d3888e3ac714a659310dab606b8ce0e194496e09 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 21:54:36 +0200 Subject: [PATCH 07/11] make migration script work --- .../versions/2b698bb45c09_normalize_nrens.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py index ed816a5b..5146faa0 100644 --- a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py +++ b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py @@ -25,6 +25,9 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name=op.f('pk_nren')) ) + # TODO before merge: check if this list is ok, its from COMP-118 with UNINETT and LANET added because otherwise + # I wouldn't know how to map them + # TODO also check if the mapping of names in the 3 data migrations below are OK op.execute( "INSERT INTO nren (name) VALUES " "('AAF - Australian Access Federation')," @@ -101,6 +104,7 @@ def upgrade(): "('KREN')," "('KRENA-AKNET')," "('KREONET')," + "('LANET')," "('LAT')," "('LEARN')," "('LERNET')," @@ -179,6 +183,7 @@ def upgrade(): "('TuRENA')," "('UARNet')," "('ULAKBIM')," + "('UNINETT')," "('UNITEC')," "('UNREN')," "('UoM/RicerkaNet')," @@ -191,22 +196,38 @@ def upgrade(): op.add_column('budgets', sa.Column('nren_id', sa.Integer())) op.execute( "UPDATE budgets SET nren_id = nren.id FROM nren " - "WHERE budgets.nren = nren.name" - " OR (budgets.nren = 'bla_alias' AND nren.name = 'bla')" - # TODO etc + "WHERE (UPPER(budgets.nren) = UPPER(nren.name))" + " OR (budgets.nren = 'ASNET' AND nren.name = 'ASNET-AM')" + " OR (budgets.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" + " OR (budgets.nren = 'SURFnet' AND nren.name = 'SURF')" + " OR (budgets.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" ) op.alter_column('budgets', 'nren_id', nullable=False) op.create_foreign_key(op.f('fk_budgets_nren_id_nren'), 'budgets', 'nren', ['nren_id'], ['id']) op.drop_column('budgets', 'nren') op.add_column('funding_source', sa.Column('nren_id', sa.Integer())) - # TODO + op.execute( + "UPDATE funding_source SET nren_id = nren.id FROM nren " + "WHERE (UPPER(funding_source.nren) = UPPER(nren.name))" + " OR (funding_source.nren = 'ASNET' AND nren.name = 'ASNET-AM')" + " OR (funding_source.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" + " OR (funding_source.nren = 'SURFnet' AND nren.name = 'SURF')" + " OR (funding_source.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" + ) op.alter_column('funding_source', 'nren_id', nullable=False) op.create_foreign_key(op.f('fk_funding_source_nren_id_nren'), 'funding_source', 'nren', ['nren_id'], ['id']) op.drop_column('funding_source', 'nren') op.add_column('charging_structure', sa.Column('nren_id', sa.Integer())) - # TODO + op.execute( + "UPDATE charging_structure SET nren_id = nren.id FROM nren " + "WHERE (UPPER(charging_structure.nren) = UPPER(nren.name))" + " OR (charging_structure.nren = 'ASNET' AND nren.name = 'ASNET-AM')" + " OR (charging_structure.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" + " OR (charging_structure.nren = 'SURFnet' AND nren.name = 'SURF')" + " OR (charging_structure.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" + ) op.alter_column('charging_structure', 'nren_id', nullable=False) op.create_foreign_key(op.f('fk_charging_structure_nren_id_nren'), 'charging_structure', 'nren', ['nren_id'], ['id']) op.drop_column('charging_structure', 'nren') -- GitLab From af44b78b951d213fbf9b86fd50a7ea8671f72765 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 22:45:50 +0200 Subject: [PATCH 08/11] fix publishers to use hardcoded nrens and a couple of aliases --- .../background_task/parse_excel_data.py | 6 +-- compendium_v2/publishers/helpers.py | 24 +++++++++++ .../publishers/survey_publisher_2022.py | 28 +++++-------- .../publishers/survey_publisher_v1.py | 41 +++++++++---------- 4 files changed, 56 insertions(+), 43 deletions(-) create mode 100644 compendium_v2/publishers/helpers.py diff --git a/compendium_v2/background_task/parse_excel_data.py b/compendium_v2/background_task/parse_excel_data.py index 3e337765..4148c91e 100644 --- a/compendium_v2/background_task/parse_excel_data.py +++ b/compendium_v2/background_task/parse_excel_data.py @@ -39,7 +39,7 @@ def fetch_budget_excel_data(): # process the data (e.g. save to database) # print(f"NREN: {nren}, Budget: {budget}, Year: {year}") - yield nren, budget, year + yield nren.upper(), budget, year def fetch_funding_excel_data(): @@ -92,7 +92,7 @@ def fetch_funding_excel_data(): # process the data (e.g. save to database) if nren is not None: - yield (nren, year, client_institution, + yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other) @@ -148,7 +148,7 @@ def fetch_charging_structure_excel_data(): f'NREN: {nren}, Charging Structure: {charging_structure},' f' Year: {year}') - yield nren, year, charging_structure + yield nren.upper(), year, charging_structure # For 2021 yield from create_points_for_year(3, 45, 2021, 2) diff --git a/compendium_v2/publishers/helpers.py b/compendium_v2/publishers/helpers.py new file mode 100644 index 00000000..4c757fc7 --- /dev/null +++ b/compendium_v2/publishers/helpers.py @@ -0,0 +1,24 @@ +from compendium_v2 import db, survey_db +from compendium_v2.db import model + + +def init_db(config): + dsn_prn = config['SQLALCHEMY_DATABASE_URI'] + db.init_db_model(dsn_prn) + dsn_survey = config['SURVEY_DATABASE_URI'] + survey_db.init_db_model(dsn_survey) + + +def get_uppercase_nren_dict(session): + """ + :param session: db session that is used to query the known NRENs + :return: a dictionary of all known NRENs db entities keyed on the uppercased name + """ + current_nrens = session.query(model.NREN).all() + nren_dict = {nren.name.upper(): nren for nren in current_nrens} + # add aliases that are used in the source data: + nren_dict['ASNET'] = nren_dict['ASNET-AM'] + nren_dict['KIFU (NIIF)'] = nren_dict['KIFU'] + nren_dict['SURFNET'] = nren_dict['SURF'] + nren_dict['UOM'] = nren_dict['UOM/RICERKANET'] + return nren_dict diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py index 762260ec..b1ed844d 100644 --- a/compendium_v2/publishers/survey_publisher_2022.py +++ b/compendium_v2/publishers/survey_publisher_2022.py @@ -8,6 +8,7 @@ from compendium_v2.environment import setup_logging from compendium_v2.config import load from compendium_v2 import db, survey_db from compendium_v2.db import model +from compendium_v2.publishers import helpers setup_logging() @@ -52,13 +53,6 @@ class FundingSource(enum.Enum): GOV_PUBLIC_BODIES = 16409 -def setup_db(config): - dsn_prn = config['SQLALCHEMY_DATABASE_URI'] - db.init_db_model(dsn_prn) - dsn_survey = config['SURVEY_DATABASE_URI'] - survey_db.init_db_model(dsn_survey) - - def query_budget(): with survey_db.session_scope() as survey: return survey.execute(text(BUDGET_QUERY)) @@ -73,12 +67,11 @@ def query_funding_sources(): def transfer_budget(): with db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) rows = query_budget() for row in rows: - nren_name = row[0] + nren_name = row[0].upper() _budget = row[1] try: budget = float(_budget.replace('"', '').replace(',', '')) @@ -92,8 +85,8 @@ def transfer_budget(): f'{nren_name} has budget set to >200M EUR for 2022. ({budget})') if nren_name not in nren_dict: - nren_dict[nren_name] = model.NREN(name=nren_name) - session.add(nren_dict[nren_name]) + logger.info(f'{nren_name} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[nren_name], @@ -106,13 +99,12 @@ def transfer_budget(): def transfer_funding_sources(): with db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) sourcedata = {} for source, data in query_funding_sources(): for row in data: - nren_name = row[0] + nren_name = row[0].upper() _value = row[1] try: value = float(_value.replace('"', '').replace(',', '')) @@ -134,8 +126,8 @@ def transfer_funding_sources(): f'{nren_name} funding sources do not sum to 100%. ({total})') if nren_name not in nren_dict: - nren_dict[nren_name] = model.NREN(name=nren_name) - session.add(nren_dict[nren_name]) + logger.info(f'{nren_name} unknown. Skipping.') + continue funding_source = model.FundingSource( nren=nren_dict[nren_name], @@ -151,7 +143,7 @@ def transfer_funding_sources(): def _cli(config): - setup_db(config) + helpers.init_db(config) transfer_budget() transfer_funding_sources() diff --git a/compendium_v2/publishers/survey_publisher_v1.py b/compendium_v2/publishers/survey_publisher_v1.py index 9f5bfe1f..073d30ac 100644 --- a/compendium_v2/publishers/survey_publisher_v1.py +++ b/compendium_v2/publishers/survey_publisher_v1.py @@ -8,31 +8,24 @@ from compendium_v2.background_task import parse_excel_data from compendium_v2.config import load from compendium_v2.db import model from compendium_v2.survey_db import model as survey_model +from compendium_v2.publishers import helpers setup_logging() logger = logging.getLogger('survey-publisher-v1') -def init_db(config): - dsn_prn = config['SQLALCHEMY_DATABASE_URI'] - db.init_db_model(dsn_prn) - dsn_survey = config['SURVEY_DATABASE_URI'] - survey_db.init_db_model(dsn_survey) - - def db_budget_migration(): with survey_db.session_scope() as survey_session, \ db.session_scope() as session: - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) # move data from Survey DB budget table data = survey_session.query(survey_model.Nrens) for nren in data: for budget in nren.budgets: - abbrev = nren.abbreviation + abbrev = nren.abbreviation.upper() year = budget.year if float(budget.budget) > 200: @@ -42,8 +35,8 @@ def db_budget_migration(): f'>200M EUR for {year}. ({budget.budget})') if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[abbrev], budget=float(budget.budget), year=year) @@ -54,8 +47,8 @@ def db_budget_migration(): for abbrev, budget, year in exceldata: if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.BudgetEntry( nren=nren_dict[abbrev], budget=budget, year=year) @@ -65,9 +58,7 @@ def db_budget_migration(): def db_funding_migration(): with db.session_scope() as session: - - current_nrens = session.query(model.NREN).all() - nren_dict = {nren.name: nren for nren in current_nrens} + nren_dict = helpers.get_uppercase_nren_dict(session) # Import the data to database data = parse_excel_data.fetch_funding_excel_data() @@ -86,8 +77,8 @@ def db_funding_migration(): f' do not sum to 100% ({total})') if abbrev not in nren_dict: - nren_dict[abbrev] = model.NREN(name=abbrev) - session.add(nren_dict[abbrev]) + logger.info(f'{abbrev} unknown. Skipping.') + continue budget_entry = model.FundingSource( nren=nren_dict[abbrev], @@ -103,18 +94,24 @@ def db_funding_migration(): def db_charging_structure_migration(): with db.session_scope() as session: + nren_dict = helpers.get_uppercase_nren_dict(session) + # Import the data to database data = parse_excel_data.fetch_charging_structure_excel_data() - for (nren, year, charging_structure) in data: + for (abbrev, year, charging_structure) in data: + if abbrev not in nren_dict: + logger.info(f'{abbrev} unknown. Skipping.') + continue + charging_structure_entry = model.ChargingStructure( - nren=nren, year=year, fee_type=charging_structure) + nren=nren_dict[abbrev], year=year, fee_type=charging_structure) session.merge(charging_structure_entry) session.commit() def _cli(config): - init_db(config) + helpers.init_db(config) db_budget_migration() db_funding_migration() db_charging_structure_migration() -- GitLab From cf3cf17cc98d35e9a91ff715fc98bac1c1d65649 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Wed, 12 Apr 2023 23:35:24 +0200 Subject: [PATCH 09/11] fix testcases --- compendium_v2/routes/budget.py | 1 - compendium_v2/routes/funding.py | 1 - test/conftest.py | 8 ++++++-- test/test_survey_publisher_2022.py | 12 ++++++++---- test/test_survey_publisher_v1.py | 4 ++++ 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/compendium_v2/routes/budget.py b/compendium_v2/routes/budget.py index 4e3136ad..763b3af3 100644 --- a/compendium_v2/routes/budget.py +++ b/compendium_v2/routes/budget.py @@ -2,7 +2,6 @@ import logging from typing import Any from flask import Blueprint, jsonify, current_app -from sqlalchemy.orm import joinedload from compendium_v2 import db from compendium_v2.db import model diff --git a/compendium_v2/routes/funding.py b/compendium_v2/routes/funding.py index a960153c..0e504355 100644 --- a/compendium_v2/routes/funding.py +++ b/compendium_v2/routes/funding.py @@ -1,7 +1,6 @@ import logging from flask import Blueprint, jsonify, current_app -from sqlalchemy.orm import joinedload from compendium_v2 import db from compendium_v2.routes import common diff --git a/test/conftest.py b/test/conftest.py index b5ef278e..60c02ab3 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -162,9 +162,13 @@ def client(data_config_filename, mocked_db, mocked_survey_db): @pytest.fixture def test_charging_structure_data(): with db.session_scope() as session: - data = _test_data_csv("ChargingStructureTestData.csv") + data = [row for row in _test_data_csv("ChargingStructureTestData.csv")] + nren_names = set([row["nren"] for row in data]) + nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} + session.add_all(nren_dict.values()) + for row in data: - nren = row["nren"] + nren = nren_dict[row["nren"]] year = row["year"] fee_type = row["fee_type"] if fee_type == "null": diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py index ea3b158f..c0da2159 100644 --- a/test/test_survey_publisher_2022.py +++ b/test/test_survey_publisher_2022.py @@ -36,29 +36,33 @@ def test_publisher(client, mocker, dummy_config): mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_funding_sources', # noqa funding_source_data) + with db.session_scope() as session: + nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'UoM/RicerkaNet', 'ASNET-AM'] + session.add_all([model.NREN(name=nren_name) for nren_name in nren_names]) + _cli(dummy_config) with db.session_scope() as session: budgets = session.query(model.BudgetEntry).order_by( model.BudgetEntry.nren_id.asc()).all() assert len(budgets) == 3 - assert budgets[0].nren.name == 'nren1' + assert budgets[0].nren.name.lower() == 'nren1' assert budgets[0].budget == 100 funding_sources = session.query(model.FundingSource).order_by( model.FundingSource.nren_id.asc()).all() assert len(funding_sources) == 3 - assert funding_sources[0].nren.name == 'nren1' + assert funding_sources[0].nren.name.lower() == 'nren1' assert funding_sources[0].client_institutions == 10 assert funding_sources[0].european_funding == 50 assert funding_sources[0].other == 40 - assert funding_sources[1].nren.name == 'nren2' + assert funding_sources[1].nren.name.lower() == 'nren2' assert funding_sources[1].client_institutions == 80 assert funding_sources[1].european_funding == 20 assert funding_sources[1].other == 0 - assert funding_sources[2].nren.name == 'nren3' + assert funding_sources[2].nren.name.lower() == 'nren3' assert funding_sources[2].client_institutions == 30 assert funding_sources[2].european_funding == 30 assert funding_sources[2].other == 30 diff --git a/test/test_survey_publisher_v1.py b/test/test_survey_publisher_v1.py index 2f669937..c2c2f431 100644 --- a/test/test_survey_publisher_v1.py +++ b/test/test_survey_publisher_v1.py @@ -13,6 +13,10 @@ def test_publisher(client, mocker, dummy_config): mocker.patch('compendium_v2.background_task.parse_excel_data.EXCEL_FILE', EXCEL_FILE) + with db.session_scope() as session: + nren_names = ['SURF', 'KIFU', 'UoM/RicerkaNet', 'ASNET-AM'] + session.add_all([model.NREN(name=nren_name) for nren_name in nren_names]) + _cli(dummy_config) with db.session_scope() as session: -- GitLab From 1d23bf05ccf9261974f2ce28fdcd1ce0dce6879a Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Thu, 13 Apr 2023 15:24:48 +0200 Subject: [PATCH 10/11] linelength to 80 and updates to the correct NREN names --- compendium_v2/db/model.py | 9 +- .../versions/2b698bb45c09_normalize_nrens.py | 316 ++++++++++-------- compendium_v2/publishers/helpers.py | 8 +- .../publishers/survey_publisher_2022.py | 17 +- .../publishers/survey_publisher_v1.py | 5 +- test/conftest.py | 9 +- test/test_survey_publisher_2022.py | 8 +- test/test_survey_publisher_v1.py | 5 +- tox.ini | 2 +- 9 files changed, 221 insertions(+), 158 deletions(-) diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py index 594c7dfe..c03955d5 100644 --- a/compendium_v2/db/model.py +++ b/compendium_v2/db/model.py @@ -32,7 +32,8 @@ class NREN(base_schema): class BudgetEntry(base_schema): __tablename__ = 'budgets' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = sa.Column( + sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) @@ -40,7 +41,8 @@ class BudgetEntry(base_schema): class FundingSource(base_schema): __tablename__ = 'funding_source' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = sa.Column( + sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) client_institutions = sa.Column( @@ -61,7 +63,8 @@ class FeeType(Enum): class ChargingStructure(base_schema): __tablename__ = 'charging_structure' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = sa.Column( + sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') year = sa.Column(sa.Integer, primary_key=True) fee_type = sa.Column('fee_type', sa.Enum("flat_fee", "usage_based_fee", diff --git a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py index 5146faa0..e2be1554 100644 --- a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py +++ b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py @@ -25,172 +25,167 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name=op.f('pk_nren')) ) - # TODO before merge: check if this list is ok, its from COMP-118 with UNINETT and LANET added because otherwise - # I wouldn't know how to map them - # TODO also check if the mapping of names in the 3 data migrations below are OK op.execute( "INSERT INTO nren (name) VALUES " - "('AAF - Australian Access Federation')," - "('AARNet')," + # "('AAF - Australian Access Federation')," + # "('AARNet')," "('ACOnet')," - "('AfgREN')," + # "('AfgREN')," "('AMRES')," - "('ANA')," - "('Ankabut')," - "('Arandu')," + # "('Ankabut')," + # "('Arandu')," "('ARENA')," - "('ARN')," + # "('ARN')," "('ARNES')," - "('ASGC')," + # "('ASGC')," "('ASNET-AM')," - "('AzRena')," + # "('AzRena')," "('AzScienceNet')," - "('BARNet')," + # "('BARNet')," "('BASNET')," - "('BdREN')," + # "('BdREN')," "('Belnet')," "('BIHARNET')," - "('BOLNET')," + # "('BOLNET')," "('BREN')," - "('Brunet')," - "('CameroonianNREN')," - "('CANARIE')," + # "('Brunet')," + # "('CameroonianNREN')," + # "('CANARIE')," "('CARNET')," - "('CEDIA')," - "('CERNET')," + # "('CEDIA')," + # "('CERNET')," "('CESNET')," - "('CNRS')," - "('CNTI')," - "('CRNet')," - "('CSTNet')," - "('CUDI')," + # "('CNRS')," + # "('CNTI')," + # "('CRNet')," + # "('CSTNet')," + # "('CUDI')," "('CYNET')," "('DeIC')," "('DFN')," - "('e-ARENA')," - "('eb@le')," + # "('e-ARENA')," + # "('eb@le')," "('EENet')," - "('ErdemNet')," - "('ERNET')," - "('EtherNet')," - "('EUN')," + # "('ErdemNet')," + # "('ERNET')," + # "('EtherNet')," + # "('EUN')," "('FCCN')," - "('FREEnet')," + # "('FREEnet')," "('Funet')," - "('GabonREN')," - "('GARNET')," + # "('GabonREN')," + # "('GARNET')," "('GARR')," - "('GCC')," + # "('GCC')," "('GRENA')," "('GRNET S.A.')," - "('HARNET')," + # "('HARNET')," "('HEAnet')," - "('INNOVA|RED')," - "('Internet2')," - "('IRANET')," - "('IRANET/IPM')," - "('iRENALA')," - "('ITB')," - "('ITC')," + # "('INNOVA|RED')," + # "('Internet2')," + # "('IRANET')," + # "('IRANET/IPM')," + # "('iRENALA')," + # "('ITB')," + # "('ITC')," "('IUCC')," "('Jisc')," - "('JREN')," - "('JUNet')," - "('KAUST')," - "('KAZRENA')," - "('KENET')," + # "('JREN')," + # "('JUNet')," + # "('KAUST')," + # "('KAZRENA')," + # "('KENET')," "('KIFU')," - "('KOREN')," - "('KREN')," - "('KRENA-AKNET')," - "('KREONET')," - "('LANET')," + # "('KOREN')," + # "('KREN')," + # "('KRENA-AKNET')," + # "('KREONET')," "('LAT')," - "('LEARN')," - "('LERNET')," + # "('LEARN')," + # "('LERNET')," "('LITNET')," - "('MaliREN')," - "('MAREN')," + # "('MaliREN')," + # "('MAREN')," "('MARnet')," - "('MARWAN')," - "('mmREN')," - "('MoRENet')," + # "('MARWAN')," + # "('mmREN')," + # "('MoRENet')," "('MREN')," - "('MYREN')," - "('NCHC')," - "('ngNER')," - "('ngREN')," - "('NiCT')," - "('NigerREN')," - "('NII')," - "('NITC')," - "('NREN')," - "('OMREN')," - "('PADI2')," - "('PERN')," + # "('MYREN')," + # "('NCHC')," + # "('ngNER')," + # "('ngREN')," + # "('NiCT')," + # "('NigerREN')," + # "('NII')," + # "('NITC')," + # "('NREN')," + # "('OMREN')," + # "('PADI2')," + # "('PERN')," "('PIONIER')," - "('PNGARNet')," - "('PREGINET')," - "('Qatar Foundation')," - "('RAAP')," - "('RADEI')," - "('RAGIE')," - "('RAICES')," - "('RAU')," - "('REACCIUN')," - "('REANNZ')," - "('RedCyT')," + # "('PNGARNet')," + # "('PREGINET')," + # "('Qatar Foundation')," + # "('RAAP')," + # "('RADEI')," + # "('RAGIE')," + # "('RAICES')," + "('RASH')," + # "('RAU')," + # "('REACCIUN')," + # "('REANNZ')," + # "('RedCyT')," "('RedIRIS')," - "('RedUNIV')," + # "('RedUNIV')," "('RENAM')," - "('RENATA')," + # "('RENATA')," "('RENATER')," - "('RENER')," - "('RENIA')," - "('RENU')," - "('RerBenin')," + # "('RENER')," + # "('RENIA')," + # "('RENU')," + # "('RerBenin')," "('RESTENA')," - "('REUNA')," + # "('REUNA')," "('RHnet')," - "('RITER')," - "('RNERT')," - "('RNP')," - "('RNRT')," - "('RNU')," + # "('RITER')," + # "('RNERT')," + # "('RNP')," + # "('RNRT')," + # "('RNU')," "('RoEduNet')," - "('RUB')," - "('RwEdNet')," + # "('RUB')," + # "('RwEdNet')," "('SANET')," - "('SANReN')," - "('SAREN')," + # "('SANReN')," + # "('SAREN')," "('SARNET')," - "('SHERN')," + # "('SHERN')," "('SigmaNet')," "('SIKT')," - "('SingAREN')," - "('snRER')," - "('Somaliren')," - "('SudREN')," + # "('SingAREN')," + # "('snRER')," + # "('Somaliren')," + # "('SudREN')," "('SUNET')," - "('SUREN')," + # "('SUREN')," "('SURF')," "('SWITCH')," - "('TARENA')," - "('TENET')," - "('TERNET')," - "('ThaiREN')," - "('TTRENT')," - "('TuRENA')," + # "('TARENA')," + # "('TENET')," + # "('TERNET')," + # "('ThaiREN')," + # "('TTRENT')," + # "('TuRENA')," "('UARNet')," "('ULAKBIM')," - "('UNINETT')," - "('UNITEC')," + # "('UNITEC')," "('UNREN')," - "('UoM/RicerkaNet')," - "('URAN')," - "('UzSciNet')," - "('VinaREN')," - "('ZAMREN')" + "('UoM')," + "('URAN')" + # "('UzSciNet')," + # "('VinaREN')," + # "('ZAMREN')" ) op.add_column('budgets', sa.Column('nren_id', sa.Integer())) @@ -200,58 +195,101 @@ def upgrade(): " OR (budgets.nren = 'ASNET' AND nren.name = 'ASNET-AM')" " OR (budgets.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" " OR (budgets.nren = 'SURFnet' AND nren.name = 'SURF')" - " OR (budgets.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" + " OR (budgets.nren = 'UoM/RicerkaNet' AND nren.name = 'UoM')" + " OR (budgets.nren = 'UNINETT' AND nren.name = 'SIKT')" + " OR (budgets.nren = 'LANET' AND nren.name = 'LAT')" + " OR (budgets.nren = 'ANA' AND nren.name = 'RASH')" ) op.alter_column('budgets', 'nren_id', nullable=False) - op.create_foreign_key(op.f('fk_budgets_nren_id_nren'), 'budgets', 'nren', ['nren_id'], ['id']) + op.create_foreign_key( + op.f('fk_budgets_nren_id_nren'), + 'budgets', 'nren', ['nren_id'], ['id']) op.drop_column('budgets', 'nren') op.add_column('funding_source', sa.Column('nren_id', sa.Integer())) op.execute( "UPDATE funding_source SET nren_id = nren.id FROM nren " "WHERE (UPPER(funding_source.nren) = UPPER(nren.name))" - " OR (funding_source.nren = 'ASNET' AND nren.name = 'ASNET-AM')" - " OR (funding_source.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" - " OR (funding_source.nren = 'SURFnet' AND nren.name = 'SURF')" - " OR (funding_source.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" + "OR (funding_source.nren = 'ASNET' AND nren.name = 'ASNET-AM')" + "OR (funding_source.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" + "OR (funding_source.nren = 'SURFnet' AND nren.name = 'SURF')" + "OR (funding_source.nren = 'UoM/RicerkaNet' AND nren.name = 'UoM')" + "OR (funding_source.nren = 'UNINETT' AND nren.name = 'SIKT')" + "OR (funding_source.nren = 'LANET' AND nren.name = 'LAT')" + "OR (funding_source.nren = 'ANA' AND nren.name = 'RASH')" ) op.alter_column('funding_source', 'nren_id', nullable=False) - op.create_foreign_key(op.f('fk_funding_source_nren_id_nren'), 'funding_source', 'nren', ['nren_id'], ['id']) + op.create_foreign_key( + op.f('fk_funding_source_nren_id_nren'), + 'funding_source', 'nren', ['nren_id'], ['id'] + ) op.drop_column('funding_source', 'nren') op.add_column('charging_structure', sa.Column('nren_id', sa.Integer())) op.execute( "UPDATE charging_structure SET nren_id = nren.id FROM nren " "WHERE (UPPER(charging_structure.nren) = UPPER(nren.name))" - " OR (charging_structure.nren = 'ASNET' AND nren.name = 'ASNET-AM')" - " OR (charging_structure.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" - " OR (charging_structure.nren = 'SURFnet' AND nren.name = 'SURF')" - " OR (charging_structure.nren = 'UoM' AND nren.name = 'UoM/RicerkaNet')" + "OR (charging_structure.nren = 'ASNET' AND nren.name = 'ASNET-AM')" + "OR (charging_structure.nren = 'KIFU (NIIF)' AND nren.name = 'KIFU')" + "OR (charging_structure.nren = 'SURFnet' AND nren.name = 'SURF')" + "OR (charging_structure.nren = 'UoM/RicerkaNet' AND nren.name = 'UoM')" + "OR (charging_structure.nren = 'UNINETT' AND nren.name = 'SIKT')" + "OR (charging_structure.nren = 'LANET' AND nren.name = 'LAT')" + "OR (charging_structure.nren = 'ANA' AND nren.name = 'RASH')" ) op.alter_column('charging_structure', 'nren_id', nullable=False) - op.create_foreign_key(op.f('fk_charging_structure_nren_id_nren'), 'charging_structure', 'nren', ['nren_id'], ['id']) + op.create_foreign_key( + op.f('fk_charging_structure_nren_id_nren'), + 'charging_structure', 'nren', ['nren_id'], ['id']) op.drop_column('charging_structure', 'nren') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.add_column('charging_structure', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) - op.execute("UPDATE charging_structure SET nren = nren.name FROM nren WHERE charging_structure.nren_id = nren.id") + op.add_column( + 'charging_structure', + sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False) + ) + op.execute( + "UPDATE charging_structure SET nren = nren.name FROM nren" + " WHERE charging_structure.nren_id = nren.id" + ) op.alter_column('charging_structure', 'nren', nullable=False) - op.drop_constraint(op.f('fk_charging_structure_nren_id_nren'), 'charging_structure', type_='foreignkey') + op.drop_constraint( + op.f('fk_charging_structure_nren_id_nren'), + 'charging_structure', + type_='foreignkey' + ) op.drop_column('charging_structure', 'nren_id') - op.add_column('funding_source', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) - op.execute("UPDATE funding_source SET nren = nren.name FROM nren WHERE funding_source.nren_id = nren.id") + op.add_column( + 'funding_source', + sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False) + ) + op.execute( + "UPDATE funding_source SET nren = nren.name FROM nren" + " WHERE funding_source.nren_id = nren.id" + ) op.alter_column('funding_source', 'nren', nullable=False) - op.drop_constraint(op.f('fk_funding_source_nren_id_nren'), 'funding_source', type_='foreignkey') + op.drop_constraint( + op.f('fk_funding_source_nren_id_nren'), + 'funding_source', + type_='foreignkey' + ) op.drop_column('funding_source', 'nren_id') - op.add_column('budgets', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False)) - op.execute("UPDATE budgets SET nren = nren.name FROM nren WHERE budgets.nren_id = nren.id") + op.add_column( + 'budgets', + sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False) + ) + op.execute( + "UPDATE budgets SET nren = nren.name FROM nren" + " WHERE budgets.nren_id = nren.id" + ) op.alter_column('budgets', 'nren', nullable=False) - op.drop_constraint(op.f('fk_budgets_nren_id_nren'), 'budgets', type_='foreignkey') + op.drop_constraint( + op.f('fk_budgets_nren_id_nren'), 'budgets', type_='foreignkey') op.drop_column('budgets', 'nren_id') op.drop_table('nren') diff --git a/compendium_v2/publishers/helpers.py b/compendium_v2/publishers/helpers.py index 4c757fc7..d95ad4e1 100644 --- a/compendium_v2/publishers/helpers.py +++ b/compendium_v2/publishers/helpers.py @@ -12,7 +12,8 @@ def init_db(config): def get_uppercase_nren_dict(session): """ :param session: db session that is used to query the known NRENs - :return: a dictionary of all known NRENs db entities keyed on the uppercased name + :return: a dictionary of all known NRENs db entities keyed on the + uppercased name """ current_nrens = session.query(model.NREN).all() nren_dict = {nren.name.upper(): nren for nren in current_nrens} @@ -20,5 +21,8 @@ def get_uppercase_nren_dict(session): nren_dict['ASNET'] = nren_dict['ASNET-AM'] nren_dict['KIFU (NIIF)'] = nren_dict['KIFU'] nren_dict['SURFNET'] = nren_dict['SURF'] - nren_dict['UOM'] = nren_dict['UOM/RICERKANET'] + nren_dict['UOM/RICERKANET'] = nren_dict['UOM'] + nren_dict['UNINETT'] = nren_dict['SIKT'] + nren_dict['LANET'] = nren_dict['LAT'] + nren_dict['ANA'] = nren_dict['RASH'] return nren_dict diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py index b1ed844d..8a532058 100644 --- a/compendium_v2/publishers/survey_publisher_2022.py +++ b/compendium_v2/publishers/survey_publisher_2022.py @@ -77,12 +77,14 @@ def transfer_budget(): budget = float(_budget.replace('"', '').replace(',', '')) except ValueError: logger.info( - f'{nren_name} has no budget for 2022. Skipping. ({_budget}))') + f'{nren_name} has no budget for 2022. Skipping.' + f' ({_budget}))') continue if budget > 200: logger.info( - f'{nren_name} has budget set to >200M EUR for 2022. ({budget})') + f'{nren_name} has budget set to >200M EUR for 2022.' + f' ({budget})') if nren_name not in nren_dict: logger.info(f'{nren_name} unknown. Skipping.') @@ -115,7 +117,10 @@ def transfer_funding_sources(): + f' ({_value}))') value = 0 - nren_info = sourcedata.setdefault(nren_name, {source_type: 0 for source_type in FundingSource}) + nren_info = sourcedata.setdefault( + nren_name, + {source_type: 0 for source_type in FundingSource} + ) nren_info[source] = value for nren_name, nren_info in sourcedata.items(): @@ -123,7 +128,8 @@ def transfer_funding_sources(): if not math.isclose(total, 100, abs_tol=0.01): logger.info( - f'{nren_name} funding sources do not sum to 100%. ({total})') + f'{nren_name} funding sources do not sum to 100%.' + f' ({total})') if nren_name not in nren_dict: logger.info(f'{nren_name} unknown. Skipping.') @@ -132,7 +138,8 @@ def transfer_funding_sources(): funding_source = model.FundingSource( nren=nren_dict[nren_name], year=2022, - client_institutions=nren_info[FundingSource.CLIENT_INSTITUTIONS], + client_institutions=nren_info[ + FundingSource.CLIENT_INSTITUTIONS], european_funding=nren_info[FundingSource.EUROPEAN_FUNDING], gov_public_bodies=nren_info[FundingSource.GOV_PUBLIC_BODIES], commercial=nren_info[FundingSource.COMMERCIAL], diff --git a/compendium_v2/publishers/survey_publisher_v1.py b/compendium_v2/publishers/survey_publisher_v1.py index 073d30ac..297ec047 100644 --- a/compendium_v2/publishers/survey_publisher_v1.py +++ b/compendium_v2/publishers/survey_publisher_v1.py @@ -39,7 +39,10 @@ def db_budget_migration(): continue budget_entry = model.BudgetEntry( - nren=nren_dict[abbrev], budget=float(budget.budget), year=year) + nren=nren_dict[abbrev], + budget=float(budget.budget), + year=year + ) session.merge(budget_entry) # Import the data from excel sheet to database diff --git a/test/conftest.py b/test/conftest.py index 60c02ab3..61f22e7e 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -73,7 +73,8 @@ def test_budget_data(): with db.session_scope() as session: data = [row for row in _test_data_csv("BudgetTestData.csv")] nren_names = set([row["nren"] for row in data]) - nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} + nren_dict = { + nren_name: model.NREN(name=nren_name) for nren_name in nren_names} session.add_all(nren_dict.values()) for row in data: @@ -121,7 +122,8 @@ def test_funding_source_data(): with db.session_scope() as session: data = [row for row in _test_data_csv("FundingSourceTestData.csv")] nren_names = set([row["nren"] for row in data]) - nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} + nren_dict = { + nren_name: model.NREN(name=nren_name) for nren_name in nren_names} session.add_all(nren_dict.values()) for row in data: @@ -164,7 +166,8 @@ def test_charging_structure_data(): with db.session_scope() as session: data = [row for row in _test_data_csv("ChargingStructureTestData.csv")] nren_names = set([row["nren"] for row in data]) - nren_dict = {nren_name: model.NREN(name=nren_name) for nren_name in nren_names} + nren_dict = { + nren_name: model.NREN(name=nren_name) for nren_name in nren_names} session.add_all(nren_dict.values()) for row in data: diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py index c0da2159..6aca9104 100644 --- a/test/test_survey_publisher_2022.py +++ b/test/test_survey_publisher_2022.py @@ -37,8 +37,12 @@ def test_publisher(client, mocker, dummy_config): funding_source_data) with db.session_scope() as session: - nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'UoM/RicerkaNet', 'ASNET-AM'] - session.add_all([model.NREN(name=nren_name) for nren_name in nren_names]) + nren_names = [ + 'Nren1', 'Nren2', 'Nren3', 'Nren4', + 'SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH' + ] + session.add_all( + [model.NREN(name=nren_name) for nren_name in nren_names]) _cli(dummy_config) diff --git a/test/test_survey_publisher_v1.py b/test/test_survey_publisher_v1.py index c2c2f431..79c80931 100644 --- a/test/test_survey_publisher_v1.py +++ b/test/test_survey_publisher_v1.py @@ -14,8 +14,9 @@ def test_publisher(client, mocker, dummy_config): EXCEL_FILE) with db.session_scope() as session: - nren_names = ['SURF', 'KIFU', 'UoM/RicerkaNet', 'ASNET-AM'] - session.add_all([model.NREN(name=nren_name) for nren_name in nren_names]) + nren_names = ['SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH'] + session.add_all( + [model.NREN(name=nren_name) for nren_name in nren_names]) _cli(dummy_config) diff --git a/tox.ini b/tox.ini index 020e8315..d48ebeb4 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ envlist = py39 [flake8] exclude = venv,.tox,webapp -max-line-length = 120 +max-line-length = 80 [testenv] deps = -- GitLab From 32700ee593472910aedc98ec035e0d19f2aa9b43 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Thu, 13 Apr 2023 16:00:48 +0200 Subject: [PATCH 11/11] updated NREN list --- .../versions/2b698bb45c09_normalize_nrens.py | 111 +----------------- 1 file changed, 1 insertion(+), 110 deletions(-) diff --git a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py index e2be1554..d59fbeaf 100644 --- a/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py +++ b/compendium_v2/migrations/versions/2b698bb45c09_normalize_nrens.py @@ -27,165 +27,56 @@ def upgrade(): op.execute( "INSERT INTO nren (name) VALUES " - # "('AAF - Australian Access Federation')," - # "('AARNet')," "('ACOnet')," - # "('AfgREN')," "('AMRES')," - # "('Ankabut')," - # "('Arandu')," "('ARENA')," - # "('ARN')," "('ARNES')," - # "('ASGC')," "('ASNET-AM')," - # "('AzRena')," "('AzScienceNet')," - # "('BARNet')," "('BASNET')," - # "('BdREN')," "('Belnet')," "('BIHARNET')," - # "('BOLNET')," "('BREN')," - # "('Brunet')," - # "('CameroonianNREN')," - # "('CANARIE')," "('CARNET')," - # "('CEDIA')," - # "('CERNET')," "('CESNET')," - # "('CNRS')," - # "('CNTI')," - # "('CRNet')," - # "('CSTNet')," - # "('CUDI')," "('CYNET')," "('DeIC')," "('DFN')," - # "('e-ARENA')," - # "('eb@le')," "('EENet')," - # "('ErdemNet')," - # "('ERNET')," - # "('EtherNet')," - # "('EUN')," "('FCCN')," - # "('FREEnet')," "('Funet')," - # "('GabonREN')," - # "('GARNET')," "('GARR')," - # "('GCC')," "('GRENA')," "('GRNET S.A.')," - # "('HARNET')," "('HEAnet')," - # "('INNOVA|RED')," - # "('Internet2')," - # "('IRANET')," - # "('IRANET/IPM')," - # "('iRENALA')," - # "('ITB')," - # "('ITC')," "('IUCC')," "('Jisc')," - # "('JREN')," - # "('JUNet')," - # "('KAUST')," - # "('KAZRENA')," - # "('KENET')," "('KIFU')," - # "('KOREN')," - # "('KREN')," - # "('KRENA-AKNET')," - # "('KREONET')," + "('KREN')," "('LAT')," - # "('LEARN')," - # "('LERNET')," "('LITNET')," - # "('MaliREN')," - # "('MAREN')," "('MARnet')," - # "('MARWAN')," - # "('mmREN')," - # "('MoRENet')," "('MREN')," - # "('MYREN')," - # "('NCHC')," - # "('ngNER')," - # "('ngREN')," - # "('NiCT')," - # "('NigerREN')," - # "('NII')," - # "('NITC')," - # "('NREN')," - # "('OMREN')," - # "('PADI2')," - # "('PERN')," "('PIONIER')," - # "('PNGARNet')," - # "('PREGINET')," - # "('Qatar Foundation')," - # "('RAAP')," - # "('RADEI')," - # "('RAGIE')," - # "('RAICES')," "('RASH')," - # "('RAU')," - # "('REACCIUN')," - # "('REANNZ')," - # "('RedCyT')," "('RedIRIS')," - # "('RedUNIV')," "('RENAM')," - # "('RENATA')," "('RENATER')," - # "('RENER')," - # "('RENIA')," - # "('RENU')," - # "('RerBenin')," "('RESTENA')," - # "('REUNA')," "('RHnet')," - # "('RITER')," - # "('RNERT')," - # "('RNP')," - # "('RNRT')," - # "('RNU')," "('RoEduNet')," - # "('RUB')," - # "('RwEdNet')," "('SANET')," - # "('SANReN')," - # "('SAREN')," "('SARNET')," - # "('SHERN')," "('SigmaNet')," "('SIKT')," - # "('SingAREN')," - # "('snRER')," - # "('Somaliren')," - # "('SudREN')," "('SUNET')," - # "('SUREN')," "('SURF')," "('SWITCH')," - # "('TARENA')," - # "('TENET')," - # "('TERNET')," - # "('ThaiREN')," - # "('TTRENT')," - # "('TuRENA')," "('UARNet')," "('ULAKBIM')," - # "('UNITEC')," "('UNREN')," "('UoM')," "('URAN')" - # "('UzSciNet')," - # "('VinaREN')," - # "('ZAMREN')" ) op.add_column('budgets', sa.Column('nren_id', sa.Integer())) -- GitLab