Skip to content
Snippets Groups Projects
Commit dd96c4d7 authored by Remco Tukker's avatar Remco Tukker
Browse files

normalization of nrens including datamigration; the publishers have not yet...

normalization of nrens including datamigration; the publishers have not yet been fixed and are broken for now
parent 6d2ab885
Branches
Tags
1 merge request!1normalization of nrens including datamigration
...@@ -3,26 +3,44 @@ import sqlalchemy as sa ...@@ -3,26 +3,44 @@ import sqlalchemy as sa
from typing import Any from typing import Any
from sqlalchemy import MetaData
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
# from sqlalchemy.orm import relationship
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
convention = {
"ix": "ix_%(column_0_label)s",
"uq": "uq_%(table_name)s_%(column_0_name)s",
"ck": "ck_%(table_name)s_%(constraint_name)s",
"fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
"pk": "pk_%(table_name)s",
}
metadata_obj = MetaData(naming_convention=convention)
# https://github.com/python/mypy/issues/2477 # https://github.com/python/mypy/issues/2477
base_schema: Any = declarative_base() base_schema: Any = declarative_base(metadata=metadata_obj)
class NREN(base_schema):
__tablename__ = 'nrens'
id = sa.Column(sa.BigInteger, primary_key=True)
name = sa.Column(sa.String(128), nullable=False)
class BudgetEntry(base_schema): class BudgetEntry(base_schema):
__tablename__ = 'budgets' __tablename__ = 'budgets'
nren = sa.Column(sa.String(128), primary_key=True) nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True)
nren = relationship(NREN)
year = sa.Column(sa.Integer, primary_key=True) year = sa.Column(sa.Integer, primary_key=True)
budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
class FundingSource(base_schema): class FundingSource(base_schema):
__tablename__ = 'funding_source' __tablename__ = 'funding_source'
nren = sa.Column(sa.String(128), primary_key=True) nren_id = sa.Column(sa.BigInteger, sa.schema.ForeignKey(NREN.id), primary_key=True)
nren = relationship(NREN)
year = sa.Column(sa.Integer, primary_key=True) year = sa.Column(sa.Integer, primary_key=True)
client_institutions = sa.Column( client_institutions = sa.Column(
sa.Numeric(asdecimal=False), nullable=False) sa.Numeric(asdecimal=False), nullable=False)
... ...
......
"""normalize nrens
Revision ID: df2536b06f35
Revises: b70ada054046
Create Date: 2023-04-08 09:00:28.451307
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'df2536b06f35'
down_revision = 'b70ada054046'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'nrens',
sa.Column('id', sa.BigInteger(), nullable=False),
sa.Column('name', sa.String(length=128), nullable=False),
sa.PrimaryKeyConstraint('id', name=op.f('pk_nrens'))
)
op.execute(
"INSERT INTO nrens (name) "
"SELECT DISTINCT nren FROM budgets UNION SELECT DISTINCT nren FROM funding_source;"
)
op.add_column('budgets', sa.Column('nren_id', sa.BigInteger()))
op.execute("UPDATE budgets SET nren_id = nrens.id FROM nrens WHERE budgets.nren = nrens.name")
op.alter_column('budgets', 'nren_id', nullable=False)
op.create_foreign_key(op.f('fk_budgets_nren_id_nrens'), 'budgets', 'nrens', ['nren_id'], ['id'])
op.drop_column('budgets', 'nren')
op.add_column('funding_source', sa.Column('nren_id', sa.BigInteger()))
op.execute("UPDATE funding_source SET nren_id = nrens.id FROM nrens WHERE funding_source.nren = nrens.name")
op.alter_column('funding_source', 'nren_id', nullable=False)
op.create_foreign_key(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', 'nrens', ['nren_id'], ['id'])
op.drop_column('funding_source', 'nren')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('funding_source', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False))
op.execute("UPDATE funding_source SET nren = nrens.name FROM nrens WHERE funding_source.nren_id = nrens.id")
op.alter_column('funding_source', 'nren', nullable=False)
op.drop_constraint(op.f('fk_funding_source_nren_id_nrens'), 'funding_source', type_='foreignkey')
op.drop_column('funding_source', 'nren_id')
op.add_column('budgets', sa.Column('nren', sa.VARCHAR(length=128), autoincrement=False))
op.execute("UPDATE budgets SET nren = nrens.name FROM nrens WHERE budgets.nren_id = nrens.id")
op.alter_column('budgets', 'nren', nullable=False)
op.drop_constraint(op.f('fk_budgets_nren_id_nrens'), 'budgets', type_='foreignkey')
op.drop_column('budgets', 'nren_id')
op.drop_table('nrens')
# ### end Alembic commands ###
...@@ -2,6 +2,7 @@ import logging ...@@ -2,6 +2,7 @@ import logging
from typing import Any from typing import Any
from flask import Blueprint, jsonify, current_app from flask import Blueprint, jsonify, current_app
from sqlalchemy.orm import joinedload
from compendium_v2 import db, survey_db from compendium_v2 import db, survey_db
from compendium_v2.db import model from compendium_v2.db import model
...@@ -62,13 +63,13 @@ def budget_view() -> Any: ...@@ -62,13 +63,13 @@ def budget_view() -> Any:
def _extract_data(entry: model.BudgetEntry): def _extract_data(entry: model.BudgetEntry):
return { return {
'NREN': entry.nren, 'NREN': entry.nren.name,
'BUDGET': float(entry.budget), 'BUDGET': float(entry.budget),
'BUDGET_YEAR': entry.year, 'BUDGET_YEAR': entry.year,
} }
with db.session_scope() as session: with db.session_scope() as session:
entries = sorted([_extract_data(entry) entries = sorted([_extract_data(entry)
for entry in session.query(model.BudgetEntry)], for entry in session.query(model.BudgetEntry).options(joinedload(model.BudgetEntry.nren))],
key=lambda d: (d['BUDGET_YEAR'], d['NREN'])) key=lambda d: (d['BUDGET_YEAR'], d['NREN']))
return jsonify(entries) return jsonify(entries)
import logging import logging
from flask import Blueprint, jsonify, current_app from flask import Blueprint, jsonify, current_app
from sqlalchemy.orm import joinedload
from compendium_v2 import db from compendium_v2 import db
from compendium_v2.routes import common from compendium_v2.routes import common
from compendium_v2.db import model from compendium_v2.db import model
...@@ -60,7 +62,7 @@ def funding_source_view() -> Any: ...@@ -60,7 +62,7 @@ def funding_source_view() -> Any:
def _extract_data(entry: model.FundingSource): def _extract_data(entry: model.FundingSource):
return { return {
'NREN': entry.nren, 'NREN': entry.nren.name,
'YEAR': int(entry.year), 'YEAR': int(entry.year),
'CLIENT_INSTITUTIONS': float(entry.client_institutions), 'CLIENT_INSTITUTIONS': float(entry.client_institutions),
'EUROPEAN_FUNDING': float(entry.european_funding), 'EUROPEAN_FUNDING': float(entry.european_funding),
...@@ -71,6 +73,6 @@ def funding_source_view() -> Any: ...@@ -71,6 +73,6 @@ def funding_source_view() -> Any:
with db.session_scope() as session: with db.session_scope() as session:
entries = sorted([_extract_data(entry) entries = sorted([_extract_data(entry)
for entry in session.query(model.FundingSource)], for entry in session.query(model.FundingSource).options(joinedload(model.FundingSource.nren))],
key=lambda d: (d['NREN'], d['YEAR'])) key=lambda d: (d['NREN'], d['YEAR']))
return jsonify(entries) return jsonify(entries)
...@@ -71,9 +71,13 @@ def mocked_db(mocker): ...@@ -71,9 +71,13 @@ def mocked_db(mocker):
@pytest.fixture @pytest.fixture
def test_budget_data(): def test_budget_data():
with db.session_scope() as session: with db.session_scope() as session:
data = _test_data_csv("BudgetTestData.csv") data = [row for row in _test_data_csv("BudgetTestData.csv")]
nren_names = set([row["nren"] for row in data])
nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)}
session.add_all(nren_dict.values())
for row in data: for row in data:
nren = row["nren"] nren = nren_dict[row["nren"]]
budget = row["budget"] budget = row["budget"]
year = row["year"] year = row["year"]
...@@ -115,9 +119,13 @@ def test_budget_data(): ...@@ -115,9 +119,13 @@ def test_budget_data():
@pytest.fixture @pytest.fixture
def test_funding_source_data(): def test_funding_source_data():
with db.session_scope() as session: with db.session_scope() as session:
data = _test_data_csv("FundingSourceTestData.csv") data = [row for row in _test_data_csv("FundingSourceTestData.csv")]
nren_names = set([row["nren"] for row in data])
nren_dict = {nren_name: model.NREN(id=idx+1, name=nren_name) for idx, nren_name in enumerate(nren_names)}
session.add_all(nren_dict.values())
for row in data: for row in data:
nren = row["nren"] nren = nren_dict[row["nren"]]
year = row["year"] year = row["year"]
client = row["client"] client = row["client"]
european = row["european"] european = row["european"]
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment