-
Remco Tukker authoredRemco Tukker authored
parse_excel_data.py 6.53 KiB
import logging
import openpyxl
import os
from compendium_v2.db.model import FeeType
from compendium_v2.environment import setup_logging
setup_logging()
logger = logging.getLogger(__name__)
EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2021_Organisation_DataSeries.xlsx")
def fetch_budget_excel_data():
# load the xlsx file
sheet_name = "1. Budget"
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)
# select the active worksheet
ws = wb[sheet_name]
# iterate over the rows in the worksheet
for row in range(14, 57):
for col in range(3, 8):
# extract the data from the row
nren = ws.cell(row=row, column=2).value
budget = ws.cell(row=row, column=col).value
year = ws.cell(row=13, column=col).value
if budget is not None:
budget = round(budget / 1000000, 2)
if budget > 200:
logger.info(f'{nren} has budget set to >200M EUR for {year}. ({budget})')
yield nren.upper(), budget, year
def fetch_funding_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "2. Income Sources"
ws = wb[sheet_name]
def hard_number_convert(s, source_name, nren, year):
if s is None:
logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year})')
return float(0)
try:
return float(s)
except ValueError:
logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year}) with value ({s})')
return float(0)
# iterate over the rows in the worksheet
def create_points_for_year_until_2017(start_row, end_row, year, col_nren, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_nren).value
client_institution = ws.cell(row=row, column=col_start).value
commercial = ws.cell(row=row, column=col_start + 1).value
geant_subsidy = ws.cell(row=row, column=col_start + 2).value
gov_public_bodies = ws.cell(row=row, column=col_start + 3).value
other_european_funding = ws.cell(row=row, column=col_start + 4).value
other = ws.cell(row=row, column=col_start + 5).value
client_institution = hard_number_convert(client_institution, "client institution", nren, year)
commercial = hard_number_convert(commercial, "commercial", nren, year)
geant_subsidy = hard_number_convert(geant_subsidy, "geant subsidy", nren, year)
gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
other_european_funding = hard_number_convert(other_european_funding, "other european funding", nren, year)
other = hard_number_convert(other, "other", nren, year)
european_funding = geant_subsidy + other_european_funding
# process the data (e.g. save to database)
if nren is not None:
yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
def create_points_for_year_from_2018(start_row, end_row, year, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_start).value
client_institution = ws.cell(row=row, column=col_start + 3).value
european_funding = ws.cell(row=row, column=col_start + 4).value
gov_public_bodies = ws.cell(row=row, column=col_start + 5).value
commercial = ws.cell(row=row, column=col_start + 6).value
other = ws.cell(row=row, column=col_start + 7).value
client_institution = hard_number_convert(client_institution, "client institution", nren, year)
european_funding = hard_number_convert(european_funding, "european funding", nren, year)
gov_public_bodies = hard_number_convert(gov_public_bodies, "gov/public_bodies", nren, year)
commercial = hard_number_convert(commercial, "commercial", nren, year)
other = hard_number_convert(other, "other", nren, year)
# process the data (e.g. save to database)
if nren is not None:
yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
# For 2016
yield from create_points_for_year_until_2017(8, 50, 2016, 43, 45)
# For 2017
yield from create_points_for_year_until_2017(8, 50, 2017, 32, 35)
# For 2018
yield from create_points_for_year_from_2018(8, 50, 2018, 21)
# For 2019
yield from create_points_for_year_from_2018(8, 50, 2019, 12)
# For 2020
yield from create_points_for_year_from_2018(8, 50, 2020, 3)
def fetch_charging_structure_excel_data():
# load the xlsx file
wb = openpyxl.load_workbook(EXCEL_FILE, data_only=True, read_only=True)
# select the active worksheet
sheet_name = "3. Charging mechanism"
ws = wb[sheet_name]
# iterate over the rows in the worksheet
def create_points_for_year(start_row, end_row, year, col_start):
for row in range(start_row, end_row):
# extract the data from the row
nren = ws.cell(row=row, column=col_start).value
charging_structure = ws.cell(row=row, column=col_start + 1).value
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
if charging_structure is not None:
if "do not charge" in charging_structure:
charging_structure = FeeType.no_charge
elif "combination" in charging_structure:
charging_structure = FeeType.combination
elif "flat" in charging_structure:
charging_structure = FeeType.flat_fee
elif "usage-based" in charging_structure:
charging_structure = FeeType.usage_based_fee
elif "Other" in charging_structure:
charging_structure = FeeType.other
else:
charging_structure = None
logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
yield nren.upper(), year, charging_structure
# For 2021
yield from create_points_for_year(3, 45, 2021, 2)
# For 2019
yield from create_points_for_year(3, 45, 2019, 6)