From ec8191540280de3822df15982b990414b2486259 Mon Sep 17 00:00:00 2001 From: Remco Tukker <remco.tukker@geant.org> Date: Thu, 27 Apr 2023 21:01:51 +0200 Subject: [PATCH] first batch of fixed python code quality issues --- .../background_task/parse_excel_data.py | 51 ++++-------- compendium_v2/config.py | 25 +----- compendium_v2/db/model.py | 81 +++++++++---------- compendium_v2/environment.py | 3 +- compendium_v2/migrations/env.py | 4 +- .../publishers/survey_publisher_2022.py | 10 +-- compendium_v2/routes/charging.py | 2 +- compendium_v2/survey_db/model.py | 21 +++-- requirements.txt | 4 +- test/test_survey_publisher_2022.py | 6 +- 10 files changed, 77 insertions(+), 130 deletions(-) diff --git a/compendium_v2/background_task/parse_excel_data.py b/compendium_v2/background_task/parse_excel_data.py index d599f477..ef8e2cd6 100644 --- a/compendium_v2/background_task/parse_excel_data.py +++ b/compendium_v2/background_task/parse_excel_data.py @@ -1,6 +1,6 @@ +import logging import openpyxl import os -import logging from compendium_v2.db.model import FeeType from compendium_v2.environment import setup_logging @@ -9,9 +9,7 @@ setup_logging() logger = logging.getLogger(__name__) -EXCEL_FILE = os.path.join( - os.path.dirname(__file__), "xlsx", - "2021_Organisation_DataSeries.xlsx") +EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2021_Organisation_DataSeries.xlsx") def fetch_budget_excel_data(): @@ -32,12 +30,7 @@ def fetch_budget_excel_data(): if budget is not None: budget = round(budget / 1000000, 2) if budget > 200: - logger.info( - f'{nren} has budget set to ' - f'>200M EUR for {year}. ({budget})') - - # process the data (e.g. save to database) - # print(f"NREN: {nren}, Budget: {budget}, Year: {year}") + logger.info(f'{nren} has budget set to >200M EUR for {year}. ({budget})') yield nren.upper(), budget, year @@ -52,17 +45,12 @@ def fetch_funding_excel_data(): def hard_number_convert(s, source_name, nren, year): if s is None: - logger.info( - f'Invalid Value :{nren} has empty value for {source_name}.' - + f'for year ({year})') + logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year})') return float(0) - """ Returns True if string is a number. """ try: return float(s) except ValueError: - logger.info( - f'Invalid Value :{nren} has empty value for {source_name}.' - + f'for year ({year}) with value ({s})') + logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year}) with value ({s})') return float(0) # iterate over the rows in the worksheet @@ -76,7 +64,6 @@ def fetch_funding_excel_data(): gov_public_bodies = ws.cell(row=row, column=col_start + 3).value other_european_funding = ws.cell(row=row, column=col_start + 4).value other = ws.cell(row=row, column=col_start + 5).value - print(nren, client_institution, commercial, geant_subsidy, gov_public_bodies, other_european_funding, other) client_institution = hard_number_convert(client_institution, "client institution", nren, year) commercial = hard_number_convert(commercial, "commercial", nren, year) @@ -89,10 +76,7 @@ def fetch_funding_excel_data(): # process the data (e.g. save to database) if nren is not None: - yield (nren.upper(), year, client_institution, - european_funding, - gov_public_bodies, - commercial, other) + yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other) def create_points_for_year_from_2018(start_row, end_row, year, col_start): for row in range(start_row, end_row): @@ -112,10 +96,7 @@ def fetch_funding_excel_data(): # process the data (e.g. save to database) if nren is not None: - yield (nren.upper(), year, client_institution, - european_funding, - gov_public_bodies, - commercial, other) + yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other) # For 2016 yield from create_points_for_year_until_2017(8, 50, 2016, 43, 45) @@ -147,26 +128,22 @@ def fetch_charging_structure_excel_data(): # extract the data from the row nren = ws.cell(row=row, column=col_start).value charging_structure = ws.cell(row=row, column=col_start + 1).value - logger.info( - f'NREN: {nren}, Charging Structure: {charging_structure},' - f' Year: {year}') + logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}') if charging_structure is not None: if "do not charge" in charging_structure: - charging_structure = FeeType.no_charge.value + charging_structure = FeeType.no_charge elif "combination" in charging_structure: - charging_structure = FeeType.combination.value + charging_structure = FeeType.combination elif "flat" in charging_structure: - charging_structure = FeeType.flat_fee.value + charging_structure = FeeType.flat_fee elif "usage-based" in charging_structure: - charging_structure = FeeType.usage_based_fee.value + charging_structure = FeeType.usage_based_fee elif "Other" in charging_structure: - charging_structure = FeeType.other.value + charging_structure = FeeType.other else: charging_structure = None - logger.info( - f'NREN: {nren}, Charging Structure: {charging_structure},' - f' Year: {year}') + logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}') yield nren.upper(), year, charging_structure diff --git a/compendium_v2/config.py b/compendium_v2/config.py index 30ee6388..5cf1a69b 100644 --- a/compendium_v2/config.py +++ b/compendium_v2/config.py @@ -1,32 +1,13 @@ import json - import jsonschema + CONFIG_SCHEMA = { '$schema': 'http://json-schema.org/draft-07/schema#', 'type': 'object', - - 'definitions': { - 'database-uri': { - 'type': 'string' - } - }, - 'properties': { - 'SQLALCHEMY_DATABASE_URI': { - 'type': 'string', - 'properties': { - 'database-uri': {'$ref': '#definitions/database-uri'} - }, - 'additionalProperties': False - }, - 'SURVEY_DATABASE_URI': { - 'type': 'string', - 'properties': { - 'database-uri': {'$ref': '#definitions/database-uri'} - }, - 'additionalProperties': False - } + 'SQLALCHEMY_DATABASE_URI': {'type': 'string', 'format': 'database-uri'}, + 'SURVEY_DATABASE_URI': {'type': 'string', 'format': 'database-uri'} }, 'required': ['SQLALCHEMY_DATABASE_URI', 'SURVEY_DATABASE_URI'], 'additionalProperties': False diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py index d1b04a0a..ec029b99 100644 --- a/compendium_v2/db/model.py +++ b/compendium_v2/db/model.py @@ -1,12 +1,10 @@ +import enum import logging -import sqlalchemy as sa -from enum import Enum - from typing import Any -from sqlalchemy import MetaData -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy import Column, Enum, Integer, MetaData, Numeric, String +from sqlalchemy.orm import relationship, declarative_base +from sqlalchemy.schema import ForeignKey logger = logging.getLogger(__name__) @@ -26,34 +24,31 @@ base_schema: Any = declarative_base(metadata=metadata_obj) class NREN(base_schema): __tablename__ = 'nren' - id = sa.Column(sa.Integer, primary_key=True) - name = sa.Column(sa.String(128), nullable=False) + id = Column(Integer, primary_key=True) + name = Column(String(128), nullable=False) class BudgetEntry(base_schema): __tablename__ = 'budgets' - nren_id = sa.Column( - sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False) + year = Column(Integer, primary_key=True) + budget = Column(Numeric(asdecimal=False), nullable=False) class FundingSource(base_schema): __tablename__ = 'funding_source' - nren_id = sa.Column( - sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - client_institutions = sa.Column( - sa.Numeric(asdecimal=False), nullable=False) - european_funding = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - gov_public_bodies = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - commercial = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - other = sa.Column(sa.Numeric(asdecimal=False), nullable=False) + year = Column(Integer, primary_key=True) + client_institutions = Column(Numeric(asdecimal=False), nullable=False) + european_funding = Column(Numeric(asdecimal=False), nullable=False) + gov_public_bodies = Column(Numeric(asdecimal=False), nullable=False) + commercial = Column(Numeric(asdecimal=False), nullable=False) + other = Column(Numeric(asdecimal=False), nullable=False) -class FeeType(Enum): +class FeeType(enum.Enum): flat_fee = "flat_fee" usage_based_fee = "usage_based_fee" combination = "combination" @@ -63,47 +58,43 @@ class FeeType(Enum): class ChargingStructure(base_schema): __tablename__ = 'charging_structure' - nren_id = sa.Column( - sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - fee_type = sa.Column('fee_type', sa.Enum("flat_fee", "usage_based_fee", - "combination", "no_charge", - "other", - name="fee_type"), nullable=True) + year = Column(Integer, primary_key=True) + fee_type = Column('fee_type', Enum(FeeType, name="fee_type"), nullable=True) class NrenStaff(base_schema): __tablename__ = 'nren_staff' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - permanent_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - subcontracted_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - technical_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False) - non_technical_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False) + year = Column(Integer, primary_key=True) + permanent_fte = Column(Numeric(asdecimal=False), nullable=False) + subcontracted_fte = Column(Numeric(asdecimal=False), nullable=False) + technical_fte = Column(Numeric(asdecimal=False), nullable=False) + non_technical_fte = Column(Numeric(asdecimal=False), nullable=False) class ParentOrganization(base_schema): __tablename__ = 'parent_organization' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - organization = sa.Column(sa.String(128), nullable=False) + year = Column(Integer, primary_key=True) + organization = Column(String(128), nullable=False) class SubOrganization(base_schema): __tablename__ = 'sub_organization' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - organization = sa.Column(sa.String(128), primary_key=True) - role = sa.Column(sa.String(128), nullable=False) + year = Column(Integer, primary_key=True) + organization = Column(String(128), primary_key=True) + role = Column(String(128), nullable=False) class ECProject(base_schema): __tablename__ = 'ec_project' - nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True) + nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True) nren = relationship(NREN, lazy='joined') - year = sa.Column(sa.Integer, primary_key=True) - project = sa.Column(sa.String(256), primary_key=True) + year = Column(Integer, primary_key=True) + project = Column(String(256), primary_key=True) diff --git a/compendium_v2/environment.py b/compendium_v2/environment.py index 55cfae65..527687c4 100644 --- a/compendium_v2/environment.py +++ b/compendium_v2/environment.py @@ -7,8 +7,7 @@ LOGGING_DEFAULT_CONFIG = { 'disable_existing_loggers': False, 'formatters': { 'simple': { - 'format': '%(asctime)s - %(name)s ' - '(%(lineno)d) - %(levelname)s - %(message)s' + 'format': '%(asctime)s - %(name)s (%(lineno)d) - %(levelname)s - %(message)s' } }, diff --git a/compendium_v2/migrations/env.py b/compendium_v2/migrations/env.py index 5ae69c65..0307be33 100644 --- a/compendium_v2/migrations/env.py +++ b/compendium_v2/migrations/env.py @@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config from sqlalchemy import pool from alembic import context -from compendium_v2.db.model import base_schema +from compendium_v2.db.model import metadata_obj # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -18,7 +18,7 @@ logging.basicConfig(level=logging.INFO) # for 'autogenerate' support # from myapp import mymodel # target_metadata = mymodel.Base.metadata -target_metadata = base_schema.metadata +target_metadata = metadata_obj # other values from the config, defined by the needs of env.py, # can be acquired: diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py index 6334fae3..00786be6 100644 --- a/compendium_v2/publishers/survey_publisher_2022.py +++ b/compendium_v2/publishers/survey_publisher_2022.py @@ -354,15 +354,15 @@ def transfer_charging_structure(): continue if "do not charge" in value: - charging_structure = FeeType.no_charge.value + charging_structure = FeeType.no_charge elif "combination" in value: - charging_structure = FeeType.combination.value + charging_structure = FeeType.combination elif "flat" in value: - charging_structure = FeeType.flat_fee.value + charging_structure = FeeType.flat_fee elif "usage-based" in value: - charging_structure = FeeType.usage_based_fee.value + charging_structure = FeeType.usage_based_fee elif "Other" in value: - charging_structure = FeeType.other.value + charging_structure = FeeType.other else: charging_structure = None diff --git a/compendium_v2/routes/charging.py b/compendium_v2/routes/charging.py index eefeb017..0b2c2384 100644 --- a/compendium_v2/routes/charging.py +++ b/compendium_v2/routes/charging.py @@ -57,7 +57,7 @@ def charging_structure_view() -> Any: return { 'NREN': entry.nren.name, 'YEAR': int(entry.year), - 'FEE_TYPE': entry.fee_type, + 'FEE_TYPE': entry.fee_type.value if entry.fee_type is not None else None, } with db.session_scope() as session: diff --git a/compendium_v2/survey_db/model.py b/compendium_v2/survey_db/model.py index 45c27ddf..fc455f23 100644 --- a/compendium_v2/survey_db/model.py +++ b/compendium_v2/survey_db/model.py @@ -1,10 +1,10 @@ import logging -import sqlalchemy as sa from typing import Any -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship +from sqlalchemy import Column, Integer, String +from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.schema import ForeignKey logger = logging.getLogger(__name__) @@ -14,17 +14,16 @@ base_schema: Any = declarative_base() class Budgets(base_schema): __tablename__ = 'budgets' - id = sa.Column(sa.Integer, primary_key=True) - budget = sa.Column(sa.String) - year = sa.Column(sa.Integer) - country_code = sa.Column('country_code', sa.String, - sa.ForeignKey('nrens.country_code')) + id = Column(Integer, primary_key=True) + budget = Column(String) + year = Column(Integer) + country_code = Column('country_code', String, ForeignKey('nrens.country_code')) nren = relationship('Nrens', back_populates='budgets') class Nrens(base_schema): __tablename__ = 'nrens' - id = sa.Column(sa.Integer, primary_key=True) - abbreviation = sa.Column(sa.String) - country_code = sa.Column(sa.String) + id = Column(Integer, primary_key=True) + abbreviation = Column(String) + country_code = Column(String) budgets = relationship('Budgets', back_populates='nren') diff --git a/requirements.txt b/requirements.txt index 782ca2f8..04245106 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ flask-cors psycopg2-binary cryptography SQLAlchemy +openpyxl pytest pytest-mock python-dotenv @@ -18,7 +19,6 @@ mypy types-docutils types-jsonschema types-Flask-Cors +types-openpyxl types-setuptools types-sqlalchemy - -openpyxl diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py index 0ec89e6e..53b09e65 100644 --- a/test/test_survey_publisher_2022.py +++ b/test/test_survey_publisher_2022.py @@ -264,11 +264,11 @@ def test_publisher(client, mocker, dummy_config): model.ChargingStructure.nren_id.asc()).all() assert len(charging_structures) == 3 assert charging_structures[0].nren.name.lower() == 'nren1' - assert charging_structures[0].fee_type == 'no_charge' + assert charging_structures[0].fee_type == model.FeeType.no_charge assert charging_structures[1].nren.name.lower() == 'nren2' - assert charging_structures[1].fee_type == 'usage_based_fee' + assert charging_structures[1].fee_type == model.FeeType.usage_based_fee assert charging_structures[2].nren.name.lower() == 'nren3' - assert charging_structures[2].fee_type == 'other' + assert charging_structures[2].fee_type == model.FeeType.other _ec_data = session.query(model.ECProject).order_by( model.ECProject.nren_id.asc()).all() -- GitLab