Skip to content
Snippets Groups Projects
Commit 8cd09275 authored by Remco Tukker's avatar Remco Tukker
Browse files

added policy to the datamodel and load the policy data from the database

parent 03ae9f20
No related branches found
No related tags found
1 merge request!20added policy to the datamodel and load the policy data from the database
......@@ -101,3 +101,17 @@ class ECProject(db.Model):
nren: Mapped[NREN] = relationship(lazy='joined')
year: Mapped[int_pk]
project: Mapped[str256_pk]
class Policy(db.Model):
__tablename__ = 'policy'
nren_id: Mapped[int_pk_fkNREN]
nren: Mapped[NREN] = relationship(lazy='joined')
year: Mapped[int_pk]
strategic_plan: Mapped[str]
environmental: Mapped[str]
equal_opportunity: Mapped[str]
connectivity: Mapped[str]
acceptable_use: Mapped[str]
privacy_notice: Mapped[str]
data_protection: Mapped[str]
"""Add policy table
Revision ID: 73e493fe7415
Revises: 35a343afaf83
Create Date: 2023-05-09 16:01:43.504993
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '73e493fe7415'
down_revision = '35a343afaf83'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
'policy',
sa.Column('nren_id', sa.Integer(), nullable=False),
sa.Column('year', sa.Integer(), nullable=False),
sa.Column('strategic_plan', sa.String(), nullable=False),
sa.Column('environmental', sa.String(), nullable=False),
sa.Column('equal_opportunity', sa.String(), nullable=False),
sa.Column('connectivity', sa.String(), nullable=False),
sa.Column('acceptable_use', sa.String(), nullable=False),
sa.Column('privacy_notice', sa.String(), nullable=False),
sa.Column('data_protection', sa.String(), nullable=False),
sa.ForeignKeyConstraint(['nren_id'], ['nren.id'], name=op.f('fk_policy_nren_id_nren')),
sa.PrimaryKeyConstraint('nren_id', 'year', name=op.f('pk_policy'))
)
# alembic doesnt handle postgres enums properly: https://github.com/sqlalchemy/alembic/issues/278
# so lets fix it by hand:
op.execute('ALTER TYPE fee_type RENAME TO feetype;')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.execute('ALTER TYPE feetype RENAME TO fee_type;')
op.drop_table('policy')
# ### end Alembic commands ###
......@@ -13,7 +13,7 @@ import math
import json
import html
from sqlalchemy import text, delete
from sqlalchemy import delete, text
from collections import defaultdict
import compendium_v2
......@@ -54,7 +54,7 @@ JOIN sections s ON q.section_id = s.id
JOIN compendia c ON s.compendium_id = c.id
WHERE
a.question_id = {}
AND c.year = 2022
AND c.year = {}
AND a.value NOT IN ('""', '"NA"', '"N/A"', '[]', '[""]', '["-"]', '["/"]')
ORDER BY n.id, a.question_id, a.updated_at DESC
"""
......@@ -122,12 +122,16 @@ def query_budget():
def query_funding_sources():
for source in FundingSource:
query = QUESTION_TEMPLATE_QUERY.format(source.value)
query = QUESTION_TEMPLATE_QUERY.format(source.value, 2022)
yield source, db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]})
def query_question(question: enum.Enum):
query = QUESTION_TEMPLATE_QUERY.format(question.value)
return query_question_id(question.value)
def query_question_id(question_id: int, year: int = 2022):
query = QUESTION_TEMPLATE_QUERY.format(question_id, year)
return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]})
......@@ -219,8 +223,7 @@ def transfer_staff_data(nren_dict):
continue
# initialize on first use, so we don't add data for nrens with no answers
data.setdefault(nren_name, {question: 0 for question in StaffQuestion})[
question] = value
data.setdefault(nren_name, {question: 0 for question in StaffQuestion})[question] = value
for nren_name, nren_info in data.items():
if sum([nren_info[question] for question in StaffQuestion]) == 0:
......@@ -370,7 +373,7 @@ def transfer_ec_projects(nren_dict):
nren_name = row[0].upper()
if nren_name not in nren_dict:
logger.info(f'{nren_name} unknown. Skipping.')
logger.warning(f'{nren_name} unknown. Skipping.')
continue
try:
......@@ -400,6 +403,65 @@ def transfer_ec_projects(nren_dict):
db.session.commit()
def transfer_policies(nren_dict):
"""
Answers are strings that should be urls, but sometimes there's other stuff
like email addresses or random text
"""
policy_questions = {
'strategy': {2022: 16469, 2021: 16064, 2020: 15720, 2019: 15305, 2018: 14910},
'environment': {2022: 16471, 2021: 16066, 2020: 15722, 2019: 15307, 2018: 14912},
'equality': {2022: 16473, 2021: 16378},
'connectiviy': {2022: 16475, 2021: 16068, 2020: 15724, 2019: 15309, 2018: 14914},
'acceptable_use': {2022: 16477, 2021: 16070, 2020: 15726, 2019: 15311, 2018: 14916},
'privacy': {2022: 16479, 2021: 16072, 2020: 15728, 2019: 15575},
'data_protection': {2022: 16481, 2021: 16074, 2020: 15730, 2019: 15577}
}
data = {}
for year in [2018, 2019, 2020, 2021, 2022]:
policy_questions_year = {key: years[year] for key, years in policy_questions.items() if year in years}
for question_key, question_id in policy_questions_year.items():
rows = query_question_id(question_id, year)
for row in rows:
nren_name = row[0].upper()
_value = row[1]
if nren_name not in nren_dict:
logger.warning(f'{nren_name} unknown. Skipping.')
continue
value = _value.split()[0].strip('"')
if value.upper() == 'N.A.' or ('.' not in value and '@' not in value):
# this test is a bit silly but does seem to filter out all the nonsense responses
logger.warning(f'"{value}" does not look like an email address or link. Skipping.')
continue
if _value not in [f'"{value}"', value]:
logger.info(f'Cleaned policy answer: "{_value}" became "{value}"')
# initialize on first use, so we don't add data for nrens with no answers
data.setdefault((nren_name, year), {q: '' for q in policy_questions.keys()})
data[(nren_name, year)][question_key] = value
for (nren_name, year), nren_info in data.items():
policy_data = model.Policy(
nren=nren_dict[nren_name],
nren_id=nren_dict[nren_name].id,
year=year,
strategic_plan=nren_info['strategy'],
environmental=nren_info['environment'],
equal_opportunity=nren_info['equality'],
connectivity=nren_info['connectiviy'],
acceptable_use=nren_info['acceptable_use'],
privacy_notice=nren_info['privacy'],
data_protection=nren_info['data_protection'],
)
db.session.merge(policy_data)
db.session.commit()
def _cli(config, app):
with app.app_context():
nren_dict = helpers.get_uppercase_nren_dict()
......@@ -410,6 +472,7 @@ def _cli(config, app):
transfer_nren_sub_org(nren_dict)
transfer_charging_structure(nren_dict)
transfer_ec_projects(nren_dict)
transfer_policies(nren_dict)
@click.command()
......
......@@ -183,9 +183,21 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):
('nren3', '["project3"]'),
]
def question_id_data(question_id, year):
if question_id in [
16469, 16064, 15720, 15305, 14910, 16471, 16066, 15722, 15307, 14912, 16473, 16378,
16475, 16068, 15724, 15309, 14914, 16477, 16070, 15726, 15311, 14916, 16479, 16072, 15728, 15575,
16481, 16074, 15730, 15577]:
return [
('nren1', f'www.nren.com/somepolicy{year}.pdf'),
('nren2', 'policyemail@nren.com'),
('nren3', 'n.a. online'),
]
mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_budget', get_rows_as_tuples)
mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_funding_sources', funding_source_data)
mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question', question_data)
mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question_id', question_id_data)
nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH']
with app_with_survey_db.app_context():
......@@ -279,3 +291,13 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):
assert _ec_data[2].nren.name.lower() == 'nren3'
assert _ec_data[2].project == 'project3'
policy_data = db.session.scalars(
select(model.Policy).order_by(model.Policy.nren_id.asc())
).all()
policy_data_2020 = [p for p in policy_data if p.year == 2020]
policy_data_2022 = [p for p in policy_data if p.year == 2022]
assert len(policy_data_2020) == 2
assert len(policy_data_2022) == 2
assert policy_data_2020[0].strategic_plan == 'www.nren.com/somepolicy2020.pdf'
assert policy_data_2020[1].strategic_plan == 'policyemail@nren.com'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment