added policy to the datamodel and load the policy data from the database

8cd09275 · Remco Tukker · 03ae9f20 · 8cd09275 · 8cd09275 · 8cd09275
Commit 8cd09275 authored May 10, 2023 by Remco Tukker
--- a/compendium_v2/db/model.py
+++ b/compendium_v2/db/model.py
@@ -101,3 +101,17 @@ class ECProject(db.Model):
    nren: Mapped[NREN] = relationship(lazy='joined')
    year: Mapped[int_pk]
    project: Mapped[str256_pk]
+
+
+class Policy(db.Model):
+    __tablename__ = 'policy'
+    nren_id: Mapped[int_pk_fkNREN]
+    nren: Mapped[NREN] = relationship(lazy='joined')
+    year: Mapped[int_pk]
+    strategic_plan: Mapped[str]
+    environmental: Mapped[str]
+    equal_opportunity: Mapped[str]
+    connectivity: Mapped[str]
+    acceptable_use: Mapped[str]
+    privacy_notice: Mapped[str]
+    data_protection: Mapped[str]
--- a/compendium_v2/migrations/versions/73e493fe7415_add_policy_table.py
+++ b/compendium_v2/migrations/versions/73e493fe7415_add_policy_table.py
+"""Add policy table
+
+Revision ID: 73e493fe7415
+Revises: 35a343afaf83
+Create Date: 2023-05-09 16:01:43.504993
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = '73e493fe7415'
+down_revision = '35a343afaf83'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        'policy',
+        sa.Column('nren_id', sa.Integer(), nullable=False),
+        sa.Column('year', sa.Integer(), nullable=False),
+        sa.Column('strategic_plan', sa.String(), nullable=False),
+        sa.Column('environmental', sa.String(), nullable=False),
+        sa.Column('equal_opportunity', sa.String(), nullable=False),
+        sa.Column('connectivity', sa.String(), nullable=False),
+        sa.Column('acceptable_use', sa.String(), nullable=False),
+        sa.Column('privacy_notice', sa.String(), nullable=False),
+        sa.Column('data_protection', sa.String(), nullable=False),
+        sa.ForeignKeyConstraint(['nren_id'], ['nren.id'], name=op.f('fk_policy_nren_id_nren')),
+        sa.PrimaryKeyConstraint('nren_id', 'year', name=op.f('pk_policy'))
+    )
+
+    # alembic doesnt handle postgres enums properly: https://github.com/sqlalchemy/alembic/issues/278
+    # so lets fix it by hand:
+    op.execute('ALTER TYPE fee_type RENAME TO feetype;')
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.execute('ALTER TYPE feetype RENAME TO fee_type;')
+
+    op.drop_table('policy')
+    # ### end Alembic commands ###
--- a/compendium_v2/publishers/survey_publisher_2022.py
+++ b/compendium_v2/publishers/survey_publisher_2022.py
@@ -13,7 +13,7 @@ import math
 import json
 import html

-from sqlalchemy import text, delete
+from sqlalchemy import delete, text
 from collections import defaultdict

 import compendium_v2
@@ -54,7 +54,7 @@ JOIN sections s ON q.section_id = s.id
 JOIN compendia c ON s.compendium_id = c.id
 WHERE
    a.question_id = {}
-    AND c.year = 2022
+    AND c.year = {}
    AND a.value NOT IN ('""', '"NA"', '"N/A"', '[]', '[""]', '["-"]', '["/"]')
 ORDER BY n.id, a.question_id, a.updated_at DESC
 """
@@ -122,12 +122,16 @@ def query_budget():

 def query_funding_sources():
    for source in FundingSource:
-        query = QUESTION_TEMPLATE_QUERY.format(source.value)
+        query = QUESTION_TEMPLATE_QUERY.format(source.value, 2022)
        yield source, db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]})


 def query_question(question: enum.Enum):
-    query = QUESTION_TEMPLATE_QUERY.format(question.value)
+    return query_question_id(question.value)
+
+
+def query_question_id(question_id: int, year: int = 2022):
+    query = QUESTION_TEMPLATE_QUERY.format(question_id, year)
    return db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]})


@@ -219,8 +223,7 @@ def transfer_staff_data(nren_dict):
                continue

            # initialize on first use, so we don't add data for nrens with no answers
-            data.setdefault(nren_name, {question: 0 for question in StaffQuestion})[
-                question] = value
+            data.setdefault(nren_name, {question: 0 for question in StaffQuestion})[question] = value

    for nren_name, nren_info in data.items():
        if sum([nren_info[question] for question in StaffQuestion]) == 0:
@@ -370,7 +373,7 @@ def transfer_ec_projects(nren_dict):
        nren_name = row[0].upper()

        if nren_name not in nren_dict:
-            logger.info(f'{nren_name} unknown. Skipping.')
+            logger.warning(f'{nren_name} unknown. Skipping.')
            continue

        try:
@@ -400,6 +403,65 @@ def transfer_ec_projects(nren_dict):
    db.session.commit()


+def transfer_policies(nren_dict):
+    """
+    Answers are strings that should be urls, but sometimes there's other stuff
+    like email addresses or random text
+    """
+    policy_questions = {
+        'strategy':        {2022: 16469, 2021: 16064, 2020: 15720, 2019: 15305, 2018: 14910},
+        'environment':     {2022: 16471, 2021: 16066, 2020: 15722, 2019: 15307, 2018: 14912},
+        'equality':        {2022: 16473, 2021: 16378},
+        'connectiviy':     {2022: 16475, 2021: 16068, 2020: 15724, 2019: 15309, 2018: 14914},
+        'acceptable_use':  {2022: 16477, 2021: 16070, 2020: 15726, 2019: 15311, 2018: 14916},
+        'privacy':         {2022: 16479, 2021: 16072, 2020: 15728, 2019: 15575},
+        'data_protection': {2022: 16481, 2021: 16074, 2020: 15730, 2019: 15577}
+    }
+
+    data = {}
+    for year in [2018, 2019, 2020, 2021, 2022]:
+        policy_questions_year = {key: years[year] for key, years in policy_questions.items() if year in years}
+        for question_key, question_id in policy_questions_year.items():
+            rows = query_question_id(question_id, year)
+            for row in rows:
+                nren_name = row[0].upper()
+                _value = row[1]
+
+                if nren_name not in nren_dict:
+                    logger.warning(f'{nren_name} unknown. Skipping.')
+                    continue
+
+                value = _value.split()[0].strip('"')
+
+                if value.upper() == 'N.A.' or ('.' not in value and '@' not in value):
+                    # this test is a bit silly but does seem to filter out all the nonsense responses
+                    logger.warning(f'"{value}" does not look like an email address or link. Skipping.')
+                    continue
+
+                if _value not in [f'"{value}"', value]:
+                    logger.info(f'Cleaned policy answer: "{_value}" became "{value}"')
+
+                # initialize on first use, so we don't add data for nrens with no answers
+                data.setdefault((nren_name, year), {q: '' for q in policy_questions.keys()})
+                data[(nren_name, year)][question_key] = value
+
+    for (nren_name, year), nren_info in data.items():
+        policy_data = model.Policy(
+            nren=nren_dict[nren_name],
+            nren_id=nren_dict[nren_name].id,
+            year=year,
+            strategic_plan=nren_info['strategy'],
+            environmental=nren_info['environment'],
+            equal_opportunity=nren_info['equality'],
+            connectivity=nren_info['connectiviy'],
+            acceptable_use=nren_info['acceptable_use'],
+            privacy_notice=nren_info['privacy'],
+            data_protection=nren_info['data_protection'],
+        )
+        db.session.merge(policy_data)
+    db.session.commit()
+
+
 def _cli(config, app):
    with app.app_context():
        nren_dict = helpers.get_uppercase_nren_dict()
@@ -410,6 +472,7 @@ def _cli(config, app):
        transfer_nren_sub_org(nren_dict)
        transfer_charging_structure(nren_dict)
        transfer_ec_projects(nren_dict)
+        transfer_policies(nren_dict)


 @click.command()

--- a/test/test_survey_publisher_2022.py
+++ b/test/test_survey_publisher_2022.py
@@ -183,9 +183,21 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):
                ('nren3', '["project3"]'),
            ]

+    def question_id_data(question_id, year):
+        if question_id in [
+                16469, 16064, 15720, 15305, 14910, 16471, 16066, 15722, 15307, 14912, 16473, 16378,
+                16475, 16068, 15724, 15309, 14914, 16477, 16070, 15726, 15311, 14916, 16479, 16072, 15728, 15575,
+                16481, 16074, 15730, 15577]:
+            return [
+                ('nren1', f'www.nren.com/somepolicy{year}.pdf'),
+                ('nren2', 'policyemail@nren.com'),
+                ('nren3', 'n.a. online'),
+            ]
+
    mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_budget', get_rows_as_tuples)
    mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_funding_sources', funding_source_data)
    mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question', question_data)
+    mocker.patch('compendium_v2.publishers.survey_publisher_2022.query_question_id', question_id_data)

    nren_names = ['Nren1', 'Nren2', 'Nren3', 'Nren4', 'SURF', 'KIFU', 'UoM', 'ASNET-AM', 'SIKT', 'LAT', 'RASH']
    with app_with_survey_db.app_context():
@@ -279,3 +291,13 @@ def test_publisher(app_with_survey_db, mocker, dummy_config):

        assert _ec_data[2].nren.name.lower() == 'nren3'
        assert _ec_data[2].project == 'project3'
+
+        policy_data = db.session.scalars(
+            select(model.Policy).order_by(model.Policy.nren_id.asc())
+        ).all()
+        policy_data_2020 = [p for p in policy_data if p.year == 2020]
+        policy_data_2022 = [p for p in policy_data if p.year == 2022]
+        assert len(policy_data_2020) == 2
+        assert len(policy_data_2022) == 2
+        assert policy_data_2020[0].strategic_plan == 'www.nren.com/somepolicy2020.pdf'
+        assert policy_data_2020[1].strategic_plan == 'policyemail@nren.com'