Skip to content
Snippets Groups Projects
parse_excel_data.py 6.53 KiB
import logging
import openpyxl
import os

from compendium_v2.db.model import FeeType
from compendium_v2.environment import setup_logging

setup_logging()

logger = logging.getLogger(__name__)

EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2021_Organisation_DataSeries.xlsx")


def fetch_budget_excel_data():
    # load the xlsx file
    sheet_name = "1. Budget"
    wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)

    # select the active worksheet
    ws = wb[sheet_name]
    # iterate over the rows in the worksheet
    for row in range(14, 57):
        for col in range(3, 8):
            # extract the data from the row
            nren = ws.cell(row=row, column=2).value
            budget = ws.cell(row=row, column=col).value
            year = ws.cell(row=13, column=col).value

            if budget is not None:
                budget = round(budget / 1000000, 2)
                if budget > 200:
                    logger.info(f'{nren} has budget set to >200M EUR for {year}. ({budget})')

                yield nren.upper(), budget, year


def fetch_funding_excel_data():
    # load the xlsx file
    wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)

    # select the active worksheet
    sheet_name = "2. Income Sources"
    ws = wb[sheet_name]

    def hard_number_convert(s, source_name, nren, year):
        if s is None:
            logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year})')
            return float(0)
        try:
            return float(s)
        except ValueError:
            logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year}) with value ({s})')
            return float(0)

    # iterate over the rows in the worksheet
    def create_points_for_year_until_2017(start_row, end_row, year, col_nren, col_start):
        for row in range(start_row, end_row):
            # extract the data from the row
            nren = ws.cell(row=row, column=col_nren).value
            client_institution = ws.cell(row=row, column=col_start).value
            commercial = ws.cell(row=row, column=col_start + 1).value
            geant_subsidy = ws.cell(row=row, column=col_start + 2).value
            gov_public_bodies = ws.cell(row=row, column=col_start + 3).value
            other_european_funding = ws.cell(row=row, column=col_start + 4).value
            other = ws.cell(row=row, column=col_start + 5).value

            client_institution = hard_number_convert(client_institution, "client institution", nren, year)
            commercial = hard_number_convert(commercial, "commercial", nren, year)
            geant_subsidy = hard_number_convert(geant_subsidy, "geant subsidy", nren, year)
            gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
            other_european_funding = hard_number_convert(other_european_funding, "other european funding", nren, year)
            other = hard_number_convert(other, "other", nren, year)

            european_funding = geant_subsidy + other_european_funding

            # process the data (e.g. save to database)
            if nren is not None:
                yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)

    def create_points_for_year_from_2018(start_row, end_row, year, col_start):
        for row in range(start_row, end_row):
            # extract the data from the row
            nren = ws.cell(row=row, column=col_start).value
            client_institution = ws.cell(row=row, column=col_start + 3).value
            european_funding = ws.cell(row=row, column=col_start + 4).value
            gov_public_bodies = ws.cell(row=row, column=col_start + 5).value
            commercial = ws.cell(row=row, column=col_start + 6).value
            other = ws.cell(row=row, column=col_start + 7).value

            client_institution = hard_number_convert(client_institution, "client institution", nren, year)
            european_funding = hard_number_convert(european_funding, "european funding", nren, year)
            gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
            commercial = hard_number_convert(commercial, "commercial", nren, year)
            other = hard_number_convert(other, "other", nren, year)

            # process the data (e.g. save to database)
            if nren is not None:
                yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)

    # For 2016
    yield from create_points_for_year_until_2017(8, 50, 2016, 43, 45)

    # For 2017
    yield from create_points_for_year_until_2017(8, 50, 2017, 32, 35)

    # For 2018
    yield from create_points_for_year_from_2018(8, 50, 2018, 21)

    # For 2019
    yield from create_points_for_year_from_2018(8, 50, 2019, 12)

    # For 2020
    yield from create_points_for_year_from_2018(8, 50, 2020, 3)


def fetch_charging_structure_excel_data():
    # load the xlsx file
    wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)

    # select the active worksheet
    sheet_name = "3. Charging mechanism"
    ws = wb[sheet_name]

    # iterate over the rows in the worksheet
    def create_points_for_year(start_row, end_row, year, col_start):
        for row in range(start_row, end_row):
            # extract the data from the row
            nren = ws.cell(row=row, column=col_start).value
            charging_structure = ws.cell(row=row, column=col_start + 1).value
            logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
            if charging_structure is not None:
                if "do not charge" in charging_structure:
                    charging_structure = FeeType.no_charge
                elif "combination" in charging_structure:
                    charging_structure = FeeType.combination
                elif "flat" in charging_structure:
                    charging_structure = FeeType.flat_fee
                elif "usage-based" in charging_structure:
                    charging_structure = FeeType.usage_based_fee
                elif "Other" in charging_structure:
                    charging_structure = FeeType.other
                else:
                    charging_structure = None

                logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')

                yield nren.upper(), year, charging_structure

    # For 2021
    yield from create_points_for_year(3, 45, 2021, 2)

    # For 2019
    yield from create_points_for_year(3, 45, 2019, 6)