conversion.py

"""
conversion
=========================

This module loads the survey data from 2022 from the survey database
and stores the data in the json structure of the new survey, so that
it can be used to prefill the 2023 survey.
It also loads an xlsx file with the data for the services questions.

"""
import logging
import click
import json
import os

import openpyxl
from sqlalchemy import delete, text, select

import compendium_v2
from compendium_v2.environment import setup_logging
from compendium_v2.db import db
from compendium_v2.config import load
from compendium_v2.survey_db import model as survey_model
from compendium_v2.db.model import NREN
from compendium_v2.db.survey_model import Survey, SurveyResponse, ResponseStatus

from compendium_v2.conversion import mapping

setup_logging()

logger = logging.getLogger('conversion')

resources_dir = f"{os.path.abspath(os.path.join( os.path.dirname(__file__), os.pardir))}/resources"

EXCEL_NREN_SERVICES_2023 = os.path.join(resources_dir, "NREN-Services-prefills_2023_Recovered.xlsx")


def query_nren(nren_id: int):
    query = mapping.ANSWERS_2022_QUERY.format(nren_id)
    answers = {}
    for row in db.session.execute(text(query), bind_arguments={'bind': db.engines[survey_model.SURVEY_DB_BIND]}):
        answers[row[0]] = row[1]
    return answers


def convert_answers(answers):
    data = {}

    for id, question_name in mapping.ID_TO_NAME.items():
        if id not in answers:
            continue

        answer = answers[id]
        if len(answer) > 1 and answer[0] == '"' and answer[-1] == '"':
            answer = answer[1:-1]
            if id in mapping.VALUE_TO_CODE_MAPPING:
                answer = mapping.VALUE_TO_CODE_MAPPING[id][answer]
        if len(answer) > 1 and answer[0] == '[' and answer[-1] == ']':
            answer = json.loads(answer)
            if id in mapping.VALUE_TO_CODE_MAPPING:
                mapped_answer = []
                for entry in answer:
                    mapped_answer.append(mapping.VALUE_TO_CODE_MAPPING[id][entry])
                answer = mapped_answer

        if id == 16504:
            answer = '\n'.join(answer)

        # code to convert my description in the mapping to a json structure
        question_names = question_name.split(":")
        subdict = data
        for name in question_names[0:-1]:
            if name[-1] == "]":
                index = name[-2]
                if index == "[":
                    subdict = subdict.setdefault(name[:-2], [])
                    break  # special case where json list is mapped to a list of dicts (part 1)
                sublist = subdict.setdefault(name[:-3], [])
                index = int(index)
                while len(sublist) <= index:
                    sublist.append({})
                subdict = sublist[index]
            else:
                subdict = subdict.setdefault(name, {})

        if isinstance(subdict, list):  # special case where json list is mapped to a list of dicts (part 2)
            for answer_entry in answer:
                subdict.append({question_names[-1]: answer_entry})
        elif question_names[-1] == "available":  # special case because we changed the policies questions a bit
            if answer == "Yes":
                subdict[question_names[-1]] = ["yes"]
        else:
            subdict[question_names[-1]] = answer

    for id, question_name in mapping.ID_TO_NAME_SERVICES.items():
        if id not in answers:
            continue

        answer = answers[id]
        answer = json.loads(answer)
        for user_type in answer:
            user_type_code = mapping.SERVICE_USER_TYPE_TO_CODE[user_type]
            formatted_question_name = question_name.format(user_type_code)

            question_names = formatted_question_name.split(":")
            subdict = data
            for name in question_names[0:-2]:
                subdict = subdict.setdefault(name, {})

            sublist = subdict.setdefault(question_names[-2], [])
            sublist.append(question_names[-1])

    return {"data": data}


def load_service_data():
    wb = openpyxl.load_workbook(EXCEL_NREN_SERVICES_2023, data_only=True, read_only=True)
    ws = wb["Sheet1"]
    rows = list(ws.rows)

    titles = rows[0]

    nren_service_data_columns = {}
    for i in range(0, 131):
        if titles[i].value:
            name = titles[i].value.split(' ')[0].upper()
            name = {'KIFÜ': 'KIFU', 'AZSCIENCENET': 'ANAS', 'PSNC': 'PIONIER'}.get(name, name)
            nren_service_data_columns[name] = i

    nren_service_data = {}

    for nren_name, start_column in nren_service_data_columns.items():
        nren_service_data[nren_name] = {}
        for row_index in range(2, 61):
            row = rows[row_index]
            service_name = row[0].value
            if row[start_column].value and row[start_column].value.upper() == 'YES':
                question_data = {"offered": ["yes"]}
                if row[start_column + 1].value:
                    question_data["name"] = row[start_column + 1].value
                if row[start_column + 2].value:
                    question_data["additional_information"] = row[start_column + 2].value

                question_key_base = mapping.SERVICES_MAPPING[service_name]
                question_name, subquestion_name = question_key_base.split(':')
                question_dict = nren_service_data[nren_name].setdefault(question_name, {})
                question_dict[subquestion_name] = question_data

    return nren_service_data


def _cli(app):

    nren_service_data = load_service_data()

    with app.app_context():

        nren_surveys = {}

        for nren in db.session.scalars(select(NREN)):
            survey_db_nren_id = mapping.NREN_IDS[nren.name]
            nren_surveys[nren] = query_nren(survey_db_nren_id)

        for nren_name in nren_service_data.keys():
            if nren_name not in [n.name.upper() for n in nren_surveys.keys()]:
                raise Exception('NREN in excel not found in source dataset!')

        db.session.execute(delete(SurveyResponse).where(
            SurveyResponse.survey_year == 2022
        ))

        survey = db.session.scalar(select(Survey).where(Survey.year == 2022))

        for nren, answers in nren_surveys.items():
            survey_dict = convert_answers(answers)
            survey_dict["data"].update(nren_service_data.get(nren.name.upper(), {}))
            survey_dict["page"] = 0
            survey_dict["verification_status"] = {}
            response = SurveyResponse(
                nren=nren,
                nren_id=nren.id,
                survey_year=2022,
                survey=survey,
                answers=survey_dict,
                status=ResponseStatus.completed
            )
            db.session.add(response)

        db.session.commit()


@click.command()
@click.option('--config', type=click.STRING, default='config.json')
def cli(config):
    app_config = load(open(config, 'r'))

    app_config['SQLALCHEMY_BINDS'] = {survey_model.SURVEY_DB_BIND: app_config['SURVEY_DATABASE_URI']}

    app = compendium_v2._create_app_with_db(app_config)
    _cli(app)


if __name__ == "__main__":
    cli()