From ec8191540280de3822df15982b990414b2486259 Mon Sep 17 00:00:00 2001
From: Remco Tukker <remco.tukker@geant.org>
Date: Thu, 27 Apr 2023 21:01:51 +0200
Subject: [PATCH] first batch of fixed python code quality issues

---
 .../background_task/parse_excel_data.py       | 51 ++++--------
 compendium_v2/config.py                       | 25 +-----
 compendium_v2/db/model.py                     | 81 +++++++++----------
 compendium_v2/environment.py                  |  3 +-
 compendium_v2/migrations/env.py               |  4 +-
 .../publishers/survey_publisher_2022.py       | 10 +--
 compendium_v2/routes/charging.py              |  2 +-
 compendium_v2/survey_db/model.py              | 21 +++--
 requirements.txt                              |  4 +-
 test/test_survey_publisher_2022.py            |  6 +-
 10 files changed, 77 insertions(+), 130 deletions(-)

diff --git a/compendium_v2/background_task/parse_excel_data.py b/compendium_v2/background_task/parse_excel_data.py
index d599f477..ef8e2cd6 100644
--- a/compendium_v2/background_task/parse_excel_data.py
+++ b/compendium_v2/background_task/parse_excel_data.py
@@ -1,6 +1,6 @@
+import logging
 import openpyxl
 import os
-import logging
 
 from compendium_v2.db.model import FeeType
 from compendium_v2.environment import setup_logging
@@ -9,9 +9,7 @@ setup_logging()
 
 logger = logging.getLogger(__name__)
 
-EXCEL_FILE = os.path.join(
-    os.path.dirname(__file__), "xlsx",
-    "2021_Organisation_DataSeries.xlsx")
+EXCEL_FILE = os.path.join(os.path.dirname(__file__), "xlsx", "2021_Organisation_DataSeries.xlsx")
 
 
 def fetch_budget_excel_data():
@@ -32,12 +30,7 @@ def fetch_budget_excel_data():
             if budget is not None:
                 budget = round(budget / 1000000, 2)
                 if budget > 200:
-                    logger.info(
-                        f'{nren} has budget set to '
-                        f'>200M EUR for {year}. ({budget})')
-
-                # process the data (e.g. save to database)
-                # print(f"NREN: {nren}, Budget: {budget}, Year: {year}")
+                    logger.info(f'{nren} has budget set to >200M EUR for {year}. ({budget})')
 
                 yield nren.upper(), budget, year
 
@@ -52,17 +45,12 @@ def fetch_funding_excel_data():
 
     def hard_number_convert(s, source_name, nren, year):
         if s is None:
-            logger.info(
-                f'Invalid Value :{nren} has empty value for {source_name}.'
-                + f'for year ({year})')
+            logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year})')
             return float(0)
-        """ Returns True if string is a number. """
         try:
             return float(s)
         except ValueError:
-            logger.info(
-                f'Invalid Value :{nren} has empty value for {source_name}.'
-                + f'for year ({year}) with value ({s})')
+            logger.info(f'Invalid Value :{nren} has empty value for {source_name} for year ({year}) with value ({s})')
             return float(0)
 
     # iterate over the rows in the worksheet
@@ -76,7 +64,6 @@ def fetch_funding_excel_data():
             gov_public_bodies = ws.cell(row=row, column=col_start + 3).value
             other_european_funding = ws.cell(row=row, column=col_start + 4).value
             other = ws.cell(row=row, column=col_start + 5).value
-            print(nren, client_institution, commercial, geant_subsidy, gov_public_bodies, other_european_funding, other)
 
             client_institution = hard_number_convert(client_institution, "client institution", nren, year)
             commercial = hard_number_convert(commercial, "commercial", nren, year)
@@ -89,10 +76,7 @@ def fetch_funding_excel_data():
 
             # process the data (e.g. save to database)
             if nren is not None:
-                yield (nren.upper(), year, client_institution,
-                       european_funding,
-                       gov_public_bodies,
-                       commercial, other)
+                yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
 
     def create_points_for_year_from_2018(start_row, end_row, year, col_start):
         for row in range(start_row, end_row):
@@ -112,10 +96,7 @@ def fetch_funding_excel_data():
 
             # process the data (e.g. save to database)
             if nren is not None:
-                yield (nren.upper(), year, client_institution,
-                       european_funding,
-                       gov_public_bodies,
-                       commercial, other)
+                yield (nren.upper(), year, client_institution, european_funding, gov_public_bodies, commercial, other)
 
     # For 2016
     yield from create_points_for_year_until_2017(8, 50, 2016, 43, 45)
@@ -147,26 +128,22 @@ def fetch_charging_structure_excel_data():
             # extract the data from the row
             nren = ws.cell(row=row, column=col_start).value
             charging_structure = ws.cell(row=row, column=col_start + 1).value
-            logger.info(
-                f'NREN: {nren}, Charging Structure: {charging_structure},'
-                f' Year: {year}')
+            logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
             if charging_structure is not None:
                 if "do not charge" in charging_structure:
-                    charging_structure = FeeType.no_charge.value
+                    charging_structure = FeeType.no_charge
                 elif "combination" in charging_structure:
-                    charging_structure = FeeType.combination.value
+                    charging_structure = FeeType.combination
                 elif "flat" in charging_structure:
-                    charging_structure = FeeType.flat_fee.value
+                    charging_structure = FeeType.flat_fee
                 elif "usage-based" in charging_structure:
-                    charging_structure = FeeType.usage_based_fee.value
+                    charging_structure = FeeType.usage_based_fee
                 elif "Other" in charging_structure:
-                    charging_structure = FeeType.other.value
+                    charging_structure = FeeType.other
                 else:
                     charging_structure = None
 
-                logger.info(
-                    f'NREN: {nren}, Charging Structure: {charging_structure},'
-                    f' Year: {year}')
+                logger.info(f'NREN: {nren}, Charging Structure: {charging_structure}, Year: {year}')
 
                 yield nren.upper(), year, charging_structure
 
diff --git a/compendium_v2/config.py b/compendium_v2/config.py
index 30ee6388..5cf1a69b 100644
--- a/compendium_v2/config.py
+++ b/compendium_v2/config.py
@@ -1,32 +1,13 @@
 import json
-
 import jsonschema
 
+
 CONFIG_SCHEMA = {
     '$schema': 'http://json-schema.org/draft-07/schema#',
     'type': 'object',
-
-    'definitions': {
-        'database-uri': {
-            'type': 'string'
-        }
-    },
-
     'properties': {
-        'SQLALCHEMY_DATABASE_URI': {
-            'type': 'string',
-            'properties': {
-                'database-uri': {'$ref': '#definitions/database-uri'}
-            },
-            'additionalProperties': False
-        },
-        'SURVEY_DATABASE_URI': {
-            'type': 'string',
-            'properties': {
-                'database-uri': {'$ref': '#definitions/database-uri'}
-            },
-            'additionalProperties': False
-        }
+        'SQLALCHEMY_DATABASE_URI': {'type': 'string', 'format': 'database-uri'},
+        'SURVEY_DATABASE_URI': {'type': 'string', 'format': 'database-uri'}
     },
     'required': ['SQLALCHEMY_DATABASE_URI', 'SURVEY_DATABASE_URI'],
     'additionalProperties': False
diff --git a/compendium_v2/db/model.py b/compendium_v2/db/model.py
index d1b04a0a..ec029b99 100644
--- a/compendium_v2/db/model.py
+++ b/compendium_v2/db/model.py
@@ -1,12 +1,10 @@
+import enum
 import logging
-import sqlalchemy as sa
-from enum import Enum
-
 from typing import Any
 
-from sqlalchemy import MetaData
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy import Column, Enum, Integer, MetaData, Numeric, String
+from sqlalchemy.orm import relationship, declarative_base
+from sqlalchemy.schema import ForeignKey
 
 logger = logging.getLogger(__name__)
 
@@ -26,34 +24,31 @@ base_schema: Any = declarative_base(metadata=metadata_obj)
 
 class NREN(base_schema):
     __tablename__ = 'nren'
-    id = sa.Column(sa.Integer, primary_key=True)
-    name = sa.Column(sa.String(128), nullable=False)
+    id = Column(Integer, primary_key=True)
+    name = Column(String(128), nullable=False)
 
 
 class BudgetEntry(base_schema):
     __tablename__ = 'budgets'
-    nren_id = sa.Column(
-        sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    budget = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
+    year = Column(Integer, primary_key=True)
+    budget = Column(Numeric(asdecimal=False), nullable=False)
 
 
 class FundingSource(base_schema):
     __tablename__ = 'funding_source'
-    nren_id = sa.Column(
-        sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    client_institutions = sa.Column(
-        sa.Numeric(asdecimal=False), nullable=False)
-    european_funding = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    gov_public_bodies = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    commercial = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    other = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
+    year = Column(Integer, primary_key=True)
+    client_institutions = Column(Numeric(asdecimal=False), nullable=False)
+    european_funding = Column(Numeric(asdecimal=False), nullable=False)
+    gov_public_bodies = Column(Numeric(asdecimal=False), nullable=False)
+    commercial = Column(Numeric(asdecimal=False), nullable=False)
+    other = Column(Numeric(asdecimal=False), nullable=False)
 
 
-class FeeType(Enum):
+class FeeType(enum.Enum):
     flat_fee = "flat_fee"
     usage_based_fee = "usage_based_fee"
     combination = "combination"
@@ -63,47 +58,43 @@ class FeeType(Enum):
 
 class ChargingStructure(base_schema):
     __tablename__ = 'charging_structure'
-    nren_id = sa.Column(
-        sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    fee_type = sa.Column('fee_type', sa.Enum("flat_fee", "usage_based_fee",
-                                             "combination", "no_charge",
-                                             "other",
-                                             name="fee_type"), nullable=True)
+    year = Column(Integer, primary_key=True)
+    fee_type = Column('fee_type', Enum(FeeType, name="fee_type"), nullable=True)
 
 
 class NrenStaff(base_schema):
     __tablename__ = 'nren_staff'
-    nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    permanent_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    subcontracted_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    technical_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
-    non_technical_fte = sa.Column(sa.Numeric(asdecimal=False), nullable=False)
+    year = Column(Integer, primary_key=True)
+    permanent_fte = Column(Numeric(asdecimal=False), nullable=False)
+    subcontracted_fte = Column(Numeric(asdecimal=False), nullable=False)
+    technical_fte = Column(Numeric(asdecimal=False), nullable=False)
+    non_technical_fte = Column(Numeric(asdecimal=False), nullable=False)
 
 
 class ParentOrganization(base_schema):
     __tablename__ = 'parent_organization'
-    nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    organization = sa.Column(sa.String(128), nullable=False)
+    year = Column(Integer, primary_key=True)
+    organization = Column(String(128), nullable=False)
 
 
 class SubOrganization(base_schema):
     __tablename__ = 'sub_organization'
-    nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    organization = sa.Column(sa.String(128), primary_key=True)
-    role = sa.Column(sa.String(128), nullable=False)
+    year = Column(Integer, primary_key=True)
+    organization = Column(String(128), primary_key=True)
+    role = Column(String(128), nullable=False)
 
 
 class ECProject(base_schema):
     __tablename__ = 'ec_project'
-    nren_id = sa.Column(sa.Integer, sa.schema.ForeignKey(NREN.id), primary_key=True)
+    nren_id = Column(Integer, ForeignKey(NREN.id), primary_key=True)
     nren = relationship(NREN, lazy='joined')
-    year = sa.Column(sa.Integer, primary_key=True)
-    project = sa.Column(sa.String(256), primary_key=True)
+    year = Column(Integer, primary_key=True)
+    project = Column(String(256), primary_key=True)
diff --git a/compendium_v2/environment.py b/compendium_v2/environment.py
index 55cfae65..527687c4 100644
--- a/compendium_v2/environment.py
+++ b/compendium_v2/environment.py
@@ -7,8 +7,7 @@ LOGGING_DEFAULT_CONFIG = {
     'disable_existing_loggers': False,
     'formatters': {
         'simple': {
-            'format': '%(asctime)s - %(name)s '
-                      '(%(lineno)d) - %(levelname)s - %(message)s'
+            'format': '%(asctime)s - %(name)s (%(lineno)d) - %(levelname)s - %(message)s'
         }
     },
 
diff --git a/compendium_v2/migrations/env.py b/compendium_v2/migrations/env.py
index 5ae69c65..0307be33 100644
--- a/compendium_v2/migrations/env.py
+++ b/compendium_v2/migrations/env.py
@@ -4,7 +4,7 @@ from sqlalchemy import engine_from_config
 from sqlalchemy import pool
 
 from alembic import context
-from compendium_v2.db.model import base_schema
+from compendium_v2.db.model import metadata_obj
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
@@ -18,7 +18,7 @@ logging.basicConfig(level=logging.INFO)
 # for 'autogenerate' support
 # from myapp import mymodel
 # target_metadata = mymodel.Base.metadata
-target_metadata = base_schema.metadata
+target_metadata = metadata_obj
 
 # other values from the config, defined by the needs of env.py,
 # can be acquired:
diff --git a/compendium_v2/publishers/survey_publisher_2022.py b/compendium_v2/publishers/survey_publisher_2022.py
index 6334fae3..00786be6 100644
--- a/compendium_v2/publishers/survey_publisher_2022.py
+++ b/compendium_v2/publishers/survey_publisher_2022.py
@@ -354,15 +354,15 @@ def transfer_charging_structure():
                 continue
 
             if "do not charge" in value:
-                charging_structure = FeeType.no_charge.value
+                charging_structure = FeeType.no_charge
             elif "combination" in value:
-                charging_structure = FeeType.combination.value
+                charging_structure = FeeType.combination
             elif "flat" in value:
-                charging_structure = FeeType.flat_fee.value
+                charging_structure = FeeType.flat_fee
             elif "usage-based" in value:
-                charging_structure = FeeType.usage_based_fee.value
+                charging_structure = FeeType.usage_based_fee
             elif "Other" in value:
-                charging_structure = FeeType.other.value
+                charging_structure = FeeType.other
             else:
                 charging_structure = None
 
diff --git a/compendium_v2/routes/charging.py b/compendium_v2/routes/charging.py
index eefeb017..0b2c2384 100644
--- a/compendium_v2/routes/charging.py
+++ b/compendium_v2/routes/charging.py
@@ -57,7 +57,7 @@ def charging_structure_view() -> Any:
         return {
             'NREN': entry.nren.name,
             'YEAR': int(entry.year),
-            'FEE_TYPE': entry.fee_type,
+            'FEE_TYPE': entry.fee_type.value if entry.fee_type is not None else None,
         }
 
     with db.session_scope() as session:
diff --git a/compendium_v2/survey_db/model.py b/compendium_v2/survey_db/model.py
index 45c27ddf..fc455f23 100644
--- a/compendium_v2/survey_db/model.py
+++ b/compendium_v2/survey_db/model.py
@@ -1,10 +1,10 @@
 import logging
-import sqlalchemy as sa
 
 from typing import Any
 
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship
+from sqlalchemy import Column, Integer, String
+from sqlalchemy.orm import declarative_base, relationship
+from sqlalchemy.schema import ForeignKey
 
 logger = logging.getLogger(__name__)
 
@@ -14,17 +14,16 @@ base_schema: Any = declarative_base()
 
 class Budgets(base_schema):
     __tablename__ = 'budgets'
-    id = sa.Column(sa.Integer, primary_key=True)
-    budget = sa.Column(sa.String)
-    year = sa.Column(sa.Integer)
-    country_code = sa.Column('country_code', sa.String,
-                             sa.ForeignKey('nrens.country_code'))
+    id = Column(Integer, primary_key=True)
+    budget = Column(String)
+    year = Column(Integer)
+    country_code = Column('country_code', String, ForeignKey('nrens.country_code'))
     nren = relationship('Nrens', back_populates='budgets')
 
 
 class Nrens(base_schema):
     __tablename__ = 'nrens'
-    id = sa.Column(sa.Integer, primary_key=True)
-    abbreviation = sa.Column(sa.String)
-    country_code = sa.Column(sa.String)
+    id = Column(Integer, primary_key=True)
+    abbreviation = Column(String)
+    country_code = Column(String)
     budgets = relationship('Budgets', back_populates='nren')
diff --git a/requirements.txt b/requirements.txt
index 782ca2f8..04245106 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ flask-cors
 psycopg2-binary
 cryptography
 SQLAlchemy
+openpyxl
 pytest
 pytest-mock
 python-dotenv
@@ -18,7 +19,6 @@ mypy
 types-docutils
 types-jsonschema
 types-Flask-Cors
+types-openpyxl
 types-setuptools
 types-sqlalchemy
-
-openpyxl
diff --git a/test/test_survey_publisher_2022.py b/test/test_survey_publisher_2022.py
index 0ec89e6e..53b09e65 100644
--- a/test/test_survey_publisher_2022.py
+++ b/test/test_survey_publisher_2022.py
@@ -264,11 +264,11 @@ def test_publisher(client, mocker, dummy_config):
             model.ChargingStructure.nren_id.asc()).all()
         assert len(charging_structures) == 3
         assert charging_structures[0].nren.name.lower() == 'nren1'
-        assert charging_structures[0].fee_type == 'no_charge'
+        assert charging_structures[0].fee_type == model.FeeType.no_charge
         assert charging_structures[1].nren.name.lower() == 'nren2'
-        assert charging_structures[1].fee_type == 'usage_based_fee'
+        assert charging_structures[1].fee_type == model.FeeType.usage_based_fee
         assert charging_structures[2].nren.name.lower() == 'nren3'
-        assert charging_structures[2].fee_type == 'other'
+        assert charging_structures[2].fee_type == model.FeeType.other
 
         _ec_data = session.query(model.ECProject).order_by(
             model.ECProject.nren_id.asc()).all()
-- 
GitLab