Merge branch 'feature/DBOARD3-900-reproduce-error-report' into 'develop'

Feature/DBOARD3-900 Reproduce error report See merge request live-projects/brian-polling-manager!3

Merge branch 'feature/DBOARD3-900-reproduce-error-report' into 'develop'
1303e83b · Pelle Koster · 435b524f · 255a207c · 1303e83b · 1303e83b
Commit 1303e83b authored 1 year ago by Pelle Koster
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ coverage.xml
 .coverage
 htmlcov
 docs/build
-*.log
\ No newline at end of file
+*.log
+dist/
\ No newline at end of file
--- a/MANIFEST.in
+++ b/MANIFEST.in
 include brian_polling_manager/logging_default_config.json
+global-include *.jinja2
\ No newline at end of file
--- a/brian_polling_manager/error_report/__init__.py
+++ b/brian_polling_manager/error_report/__init__.py
--- a/brian_polling_manager/error_report/cli.py
+++ b/brian_polling_manager/error_report/cli.py
+"""
+Report Interface Errors
+=======================
+
+CLI tool for generating an interface error report and sending it by email to the OC.
+
+Every day an email may be sent that summarizes interface errors for all (*) GEANT
+routers and other network devices. The error report is sent as an html attachment to
+that email. First all relevant routers and interfaces are requested from Inventory
+Provider. Then InfluxDB is queried for the latest, and yesterday's, error measurement
+points. For every interface, the latest error counts are compared against yesterday's
+error count to determine whether it has suffered new errors. Currently the following
+errors are checked:
+
+    * ``framing-errors``
+    * ``bit-error-seconds``
+    * ``errored-blocks-seconds``
+    * ``input-crc-errors``
+    * ``input-total-errors``
+    * ``input-discards``
+    * ``input-drops``
+    * ``output-drops``
+
+For every interface with new errors is added to a summary report. This report is then
+sent to the OC.
+
+.. code-block:: bash
+
+    Usage: report-interface-errors [OPTIONS]
+
+    Options:
+      --config PATH     Path to a config file for this tool. The schema this config
+                        file must adhere to can be found in
+                        ``brian_polling_manager.error_report.config.ERROR_REPORT_CONFIG_SCHEMA``
+
+[2024-04-09] This tool is the successor of a bash-script that was used before. That
+script has some peculiarities in it's output and as of this new version mimics the
+output of the earlier tool as much as possible.
+
+*) There are some rules which routers/interfaces to include and exclude. See the
+`get_relevant_interfaces`_ function for more details.
+"""
+
+from datetime import datetime
+import json
+import logging
+import os
+import pathlib
+from typing import Sequence
+from brian_polling_manager.interface_stats.services import influx_client
+from brian_polling_manager.inventory import load_interfaces
+import click
+from influxdb import InfluxDBClient
+from brian_polling_manager.error_report.config import load
+from brian_polling_manager.error_report.report import (
+    render_email,
+    render_html,
+    send_email,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# The error field names in the influx query vs their reporting name
+ERROR_FIELDS = {
+    "last_input_framing_errors": "framing-errors",
+    "last_bit_error_seconds": "bit-error-seconds",
+    "last_errored_blocks_seconds": "errored-blocks-seconds",
+    "last_input_crc_errors": "input-crc-errors",
+    "last_input_total_errors": "input-total-errors",
+    "last_input_discards": "input-discards",
+    "last_input_drops": "input-drops",
+    "last_output_drops": "output-drops",
+}
+
+INFLUX_TIME_WINDOW_TODAY = "time > now() - 1d"
+INFLUX_TIME_WINDOW_YESTERDAY = "time < now() - 1d and time > now() - 2d"
+
+PROCESSED_ERROR_COUNTERS_SCHEMA = {
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "definitions": {
+        "error_counters_content": {
+            "type": "object",
+            "properties": {
+                "framing-errors": {"type": "integer"},
+                "bit-error-seconds": {"type": "integer"},
+                "errored-blocks-seconds": {"type": "integer"},
+                "input-crc-errors": {"type": "integer"},
+                "input-total-errors": {"type": "integer"},
+                "input-discards": {"type": "integer"},
+                "input-drops": {"type": "integer"},
+                "output-drops": {"type": "integer"},
+            },
+            "additionalProperties": False,
+        },
+        "interface_error_counters": {
+            "type": "object",
+            "properties": {
+                "router": {"type": "string"},
+                "interface": {"type": "string"},
+                "description": {"type": "string"},
+                "error_counters": {"$ref": "#/definitions/error_counters_content"},
+                "diff": {"$ref": "#/definitions/error_counters_content"},
+            },
+            "required": [
+                "router",
+                "interface",
+                "description",
+                "error_counters",
+            ],
+            "additionalProperties": False,
+        },
+        "excluded_interface_error_counters": {
+            "type": "object",
+            "properties": {
+                "router": {"type": "string"},
+                "interface": {"type": "string"},
+                "description": {"type": "string"},
+                "error_counters": {"$ref": "#/definitions/error_counters_content"},
+            },
+            "required": [
+                "router",
+                "interface",
+                "description",
+                "error_counters",
+            ],
+            "additionalProperties": False,
+        },
+    },
+    "type": "object",
+    "properties": {
+        "interfaces": {
+            "type": "array",
+            "items": {"$ref": "#/definitions/interface_error_counters"},
+        },
+        "excluded_interfaces": {
+            "type": "array",
+            "items": {"$ref": "#/definitions/excluded_interface_error_counters"},
+        },
+    },
+    "required": ["interfaces", "excluded_interfaces"],
+    "additionalProperties": False,
+}
+
+
+LOGGING_DEFAULT_CONFIG = {
+    "version": 1,
+    "disable_existing_loggers": False,
+    "formatters": {"simple": {"format": "%(asctime)s - %(levelname)s - %(message)s"}},
+    "handlers": {
+        "console": {
+            "class": "logging.StreamHandler",
+            "level": "INFO",
+            "formatter": "simple",
+            "stream": "ext://sys.stdout",
+        },
+    },
+    "loggers": {
+        "brian_polling_manager": {
+            "level": "INFO",
+            "handlers": ["console"],
+            "propagate": False,
+        }
+    },
+    "root": {"level": "INFO", "handlers": ["console"]},
+}
+
+
+def setup_logging():
+    """
+    set up logging using the configured filename
+
+    if LOGGING_CONFIG is defined in the environment, use this for
+    the filename, otherwise use LOGGING_DEFAULT_CONFIG
+    """
+    logging_config = LOGGING_DEFAULT_CONFIG
+    if "LOGGING_CONFIG" in os.environ:
+        filename = os.environ["LOGGING_CONFIG"]
+        with open(filename) as f:
+            logging_config = json.loads(f.read())
+
+    logging.config.dictConfig(logging_config)
+
+
+def get_error_points(client: InfluxDBClient, time_window: str):
+    """Get the last value for every error field for every (router, interface)
+
+    :param client: an `InfluxDBCLient`_
+    :param time_window: an influx time window such as `INFLUX_TIME_WINDOW_TODAY` or
+        `INFLUX_TIME_WINDOW_YESTERDAY_
+    :returns: a dict {(router, interface): error_point }  were error_point is a dict
+        with all the error field values for that respective interface
+    """
+    raw_data = client.query(
+        # This query may actually return values from mulitple different points if
+        # some values are missing for the last point. But it's close enough.
+        (
+            f"SELECT last(*) FROM errors WHERE {time_window} "
+            "group by hostname, interface_name;"
+        )
+    )
+    return {
+        (tags["hostname"], tags["interface_name"]): next(points, {})
+        for (_, tags), points in raw_data.items()
+    }
+
+
+def select_error_fields(errors, mapping):
+    """Create a dictionary with every target key from `mapping`_ and its corresponding
+    value from the `errors`_ dictionary, or ``0`` if it doesn't exist or has a ``None``
+    value
+
+    :param errors: An error point dictionary coming from Influx
+    :param mapping: A field name mapping {source: target} (ie. `ERROR_FIELDS`) for
+        translating field names from the influx query to their error names in the report
+    :returns: A new dictionary containing all relevant error counts
+    """
+    # the extra `or 0` is for substituting None values
+    return {tgt: errors.get(src, 0) or 0 for src, tgt in mapping.items()}
+
+
+def interface_errors(
+    client: InfluxDBClient, interface_info, errors, exclusions=(), raise_on_errors=False
+):
+    """
+    Retrieves error counters from influx
+
+    :param client: InfluxDBClient for connecting to influx
+    :param interface_info: a dict of {(router, interface): info_dict} with interface
+        information coming from invprov (ie. the output from `get_relevant_interfaces`_)
+    :param errors: A dict of (input_data_field: result_field) for every error to report
+        on (see `ERROR_FIELDS`_)
+    :param raise_on_errors: raise when certain exceptions occur (useful for testing)
+
+    :result: an instance of PROCESSED_ERROR_COUNTERS_SCHEMA
+    """
+    todays_data = {
+        key: select_error_fields(val, mapping=errors)
+        for key, val in get_error_points(client, INFLUX_TIME_WINDOW_TODAY).items()
+    }
+    yesterdays_data = {
+        key: select_error_fields(val, mapping=errors)
+        for key, val in get_error_points(client, INFLUX_TIME_WINDOW_YESTERDAY).items()
+    }
+
+    result = {"interfaces": [], "excluded_interfaces": []}
+    for (router, ifc), info in interface_info.items():
+        try:
+            today = todays_data[(router, ifc)]
+        except KeyError:
+            logger.error(f"{router} - {ifc} not found in influx data")
+            if raise_on_errors:
+                raise
+            continue
+
+        if not any(err > 0 for err in today.values()):
+            # skip interfaces without any errors
+            continue
+
+        yesterday = yesterdays_data.get((router, ifc), {})
+
+        counters = {
+            "router": router,
+            "interface": ifc,
+            "error_counters": today,
+            "description": info["description"],
+        }
+
+        if not is_excluded_interface(info["description"], exclusions):
+            nonzero_errors = {err: val for err, val in today.items() if val > 0}
+            counters["error_counters"] = nonzero_errors
+
+            if any(yesterday.values()):
+                # we have existing errors
+
+                # This is strictly not the most correct way to determine differences.
+                # during the day the error count may have reset and diffs may actually
+                # be negative, but we ignore those because that is (mostly) how it was
+                # done in the orginal bash script
+                diff = {
+                    err: (val - yesterday[err])
+                    for err, val in nonzero_errors.items()
+                    if (val - yesterday[err]) > 0
+                }
+                if not diff:
+                    # Skip interface if it does not have any increased error counters
+                    continue
+
+                counters["diff"] = diff
+
+            result["interfaces"].append(counters)
+        else:
+            logger.info(f"Found excluded interface {router} - {ifc}")
+            result["excluded_interfaces"].append(counters)
+
+    return result
+
+
+def is_excluded_interface(description: str, exclusions: Sequence[str]):
+    """Some interfaces generate a lot of noise and should be excluded"""
+    # We may want to put this logic inside inventory provider
+    return any(excl.lower() in description.lower() for excl in exclusions)
+
+
+def get_relevant_interfaces(hosts):
+    """Get interface info from inventory provider. Some interfaces are considered
+    irrelevant based on their description"""
+
+    return _filter_and_sort_interfaces(load_interfaces(hosts))
+
+
+def _filter_and_sort_interfaces(interfaces):
+    # We may want to put this logic inside inventory provider and serve from a new
+    # endpoint
+    return dict(
+        sorted(
+            ((i["router"], i["name"]), i)
+            for i in interfaces
+            if all(
+                (
+                    "PHY" in i["description"].upper(),
+                    "SPARE" not in i["description"].upper(),
+                    "NON-OPERATIONAL" not in i["description"].upper(),
+                    "RESERVED" not in i["description"].upper(),
+                    "TEST" not in i["description"].upper(),
+                    "dsc." not in i["name"].lower(),
+                    "fxp" not in i["name"].lower(),
+                )
+            )
+        )
+    )
+
+
+def main(config: dict):
+    """Main function for the error reporting script
+
+    :param config: An instance of `ERROR_REPORT_CONFIG_SCHEMA`
+    """
+    logger.info(f"Retrieving interfaces from inventory provider: {config['inventory']}")
+
+    all_interfaces = get_relevant_interfaces(config["inventory"])
+    client = influx_client(config["influx"])
+    with client:
+        logger.info("Retrieving error points from influxdb...")
+        all_error_counters = interface_errors(
+            client,
+            interface_info=all_interfaces,
+            errors=ERROR_FIELDS,
+            exclusions=config["exclude-interfaces"],
+        )
+    logger.info("Generating report...")
+
+    body = render_html(
+        all_error_counters,
+        date=datetime.utcnow().strftime("%a %d %b %H:%M:%S UTC %Y"),
+    )
+    email = render_email(config["email"], html=body)
+    logger.info("Sending email...")
+    send_email(email, config=config["email"])
+    logger.info("Done!")
+
+
+@click.command()
+@click.option(
+    "-c",
+    "--config",
+    type=click.Path(
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+        readable=True,
+        path_type=pathlib.Path,
+    ),
+    help="path to a config file",
+)
+def cli(config):
+    setup_logging()
+    config = load(config_file=config)
+    main(config)
+
+
+if __name__ == "__main__":
+    cli()
--- a/brian_polling_manager/error_report/config-example.json
+++ b/brian_polling_manager/error_report/config-example.json
+{
+  "email": {
+    "from": "noreply@geant.org",
+    "reply_to": "noreply@geant.org",
+    "to": "some-bogus-email",
+    "cc": "some-cc",
+    "hostname": "some.smtp.server",
+    "username": "smtp-user",
+    "password": "smtp-password",
+    "starttls": false
+  },
+  "inventory": ["blah"],
+  "influx": {
+    "hostname": "hostname",
+    "database": "dbname",
+    "measurement": "errors",
+    "username": "some-username",
+    "password": "user-password"
+  },
+  "exclude-interfaces": [
+    "SOME DESCRIPTION PART"
+  ]
+}
--- a/brian_polling_manager/error_report/config.py
+++ b/brian_polling_manager/error_report/config.py
+import json
+import logging.config
+import pathlib
+
+import jsonschema
+
+logger = logging.getLogger(__name__)
+
+ERROR_REPORT_CONFIG_SCHEMA = {
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "definitions": {
+        "email-params": {
+            "type": "object",
+            "properties": {
+                "from": {"type": "string"},
+                "reply_to": {"type": "string"},
+                "to": {"$ref": "#/definitions/string-or-array"},
+                "cc": {"$ref": "#/definitions/string-or-array"},
+                "hostname": {"type": "string"},
+                "username": {"type": "string"},
+                "password": {"type": "string"},
+                "starttls": {"type": "boolean"},
+            },
+            "required": [
+                "from",
+                "to",
+                "hostname",
+            ],
+            "additionalProperties": False,
+        },
+        "string-or-array": {
+            "oneOf": [
+                {"type": "string"},
+                {"type": "array", "items": {"type": "string"}, "minItems": 1},
+            ]
+        },
+        "influx-db-measurement": {
+            "type": "object",
+            "properties": {
+                "ssl": {"type": "boolean"},
+                "hostname": {"type": "string"},
+                "port": {"type": "integer"},
+                "username": {"type": "string"},
+                "password": {"type": "string"},
+                "database": {"type": "string"},
+                "measurement": {"type": "string"},
+            },
+            "required": [
+                # ssl, port are optional
+                "hostname",
+                "username",
+                "password",
+                "database",
+                "measurement",
+            ],
+            "additionalProperties": False,
+        },
+    },
+    "type": "object",
+    "properties": {
+        "email": {"$ref": "#/definitions/email-params"},
+        "inventory": {
+            "type": "array",
+            "items": {"type": "string", "format": "uri"},
+            "minItems": 1,
+        },
+        "influx": {"$ref": "#/definitions/influx-db-measurement"},
+        "exclude-interfaces": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
+    },
+    "required": ["email", "influx"],
+    "additionalProperties": False,
+}
+
+
+def load(config_file: pathlib.Path):
+    """
+    loads, validates and returns configuration parameters
+
+    :param config_file: filename (file-like object, opened for reading)
+    :return: a dict containing configuration parameters
+    :raises: json.JSONDecodeError, jsonschema.ValidationError
+    """
+
+    config = json.loads(config_file.read_text())
+    jsonschema.validate(config, ERROR_REPORT_CONFIG_SCHEMA)
+
+    # convert str addresses into list of str
+    if isinstance(config["email"]["to"], str):
+        config["email"]["to"] = [config["email"]["to"]]
+    if isinstance(config["email"].get("cc"), str):
+        config["email"]["cc"] = [config["email"]["cc"]]
+    return config
--- a/brian_polling_manager/error_report/email.jinja2
+++ b/brian_polling_manager/error_report/email.jinja2
+{#-
+We have a lot of hardcoded stuff in here. Ideally we don't want to use jinja here
+at all but use pythons EmailMessage or MIMEBase classes to construct the email.
+However, there are a bunch of peculiarities in the email message that make it hard to
+do this, unless we can simplify the email. But for now we stick to the original email
+message format
+-#}
+From: {{ config.from }}
+Sender: {{ config.from }}
+{%- if config.reply_to is defined %}
+Reply-To: {{ config.reply_to }}
+{%- endif %}
+To: {{ config.to | join('; ') }}
+{%- if config.cc is defined %}
+Cc: {{ config.cc | join('; ') }}
+{%- endif %}
+Mime-Version: 1.0
+Subject: {{ subject }}
+X-Mailer: /home/neteam/code/juniper_errors_report/email.sh
+Content-Type: multipart/mixed; boundary="-"
+
+---
+Content-Type: text/plain; format=flowed; charset=ISO-8859-1
+Content-Disposition: inline
+Content-Transfer-Encoding: 8bit
+
+Hi,
+
+The latest errors report is attached.
+
+Regards,
+neteam@neteam-server01.geant.org:/home/neteam/code/juniper_errors_report
+
+---
+Content-Type: text/plain; name="errors_report.html"
+Content-Transfer-Encoding: base64
+Content-Disposition: inline; filename="errors_report.html"
+Content-MD5: {{ md5_hash }}
+
+{{ b64_report | wordwrap(76) }}
\ No newline at end of file
--- a/brian_polling_manager/error_report/error_report.html.jinja2
+++ b/brian_polling_manager/error_report/error_report.html.jinja2
+<html>
+<body>
+<pre>
+{#- We mix tabs with spaces and have otherwise inconsistent whitespace to keep the
+    output as close as possible to the output of the original script
+#}
+{%- for ifc in interfaces  %}
+=================================
+{{ ifc.router }}
+=================================
+	{{ ifc.interface }}	         {{ ifc.description }}
+        {%- if ifc.diff %}
+        {%- for err, diff in ifc.diff.items() %}
+		{{ err }}{{ "	" if err == "framing-errors" else "" }}	{{ ifc.error_counters[err] }}		Diff:	{{ diff }}
+        {%- endfor %}
+        {%- else %}
+        {%- for err, val in ifc.error_counters.items() %}
+		{{ err }}{{ "	" if err == "framing-errors" else "" }}	{{ val }}
+        {%- endfor %}
+        {%- endif %}
+{{ '' }}
+{%- endfor %}
+
+{%- if excluded_interfaces %}
+ROUTER,INTERFACE,FRAMING ERRORS,BIT ERROR SECONDS,ERRORED BLOCKS SECONDS,CRC ERRORS,TOTAL ERRORS,INPUT DISCARDS,INPUT DROPS,OUTPUT DROPS
+{%- for ifc in excluded_interfaces %}
+{{ifc.router}},{{ifc.interface}},{{ ifc.error_counters.values() | join(',') }},{{ifc.description}}
+{%- endfor %}
+{%- endif %}
+
+
+
+Generated {{ date }}
+</pre>
+</body>
+</html>
--- a/brian_polling_manager/error_report/report.py
+++ b/brian_polling_manager/error_report/report.py
+import logging
+import pathlib
+import smtplib
+import jinja2
+import hashlib
+import base64
+
+THIS_DIR = pathlib.Path(__file__).parent
+logger = logging.getLogger(__name__)
+
+SMTP_TIMEOUT_SECONDS = 5
+
+
+def render_html(errors, date):
+    """
+    Render error struct to email body
+
+    :param errors: an instance of `PROCESSED_ERROR_COUNTERS_SCHEMA`
+    :param template_file: a jinja2 template to render
+    """
+    env = jinja2.Environment(
+        # use CRLF since that was (explicitly) used by the original bash script, perhaps
+        # it's unnecessary
+        loader=jinja2.FileSystemLoader(THIS_DIR),
+        newline_sequence="\r\n",
+    )
+    template = env.get_template("error_report.html.jinja2")
+    return template.render(**errors, date=date)
+
+
+def render_email(
+    email_config: dict, html: str, subject="GEANT Juniper Interface Errors Report"
+):
+    env = jinja2.Environment(loader=jinja2.FileSystemLoader(THIS_DIR))
+    template = env.get_template("email.jinja2")
+    html_as_bytes = html.encode()
+    md5_hash = hashlib.md5(html_as_bytes).hexdigest()
+    b64_report = base64.b64encode(html_as_bytes).decode()
+    return template.render(
+        config=email_config, subject=subject, md5_hash=md5_hash, b64_report=b64_report
+    )
+
+
+def send_email(payload: str, config: dict):
+    with smtplib.SMTP(
+        host=config["hostname"], port=25, timeout=SMTP_TIMEOUT_SECONDS
+    ) as server:
+        if config.get("starttls"):
+            server.starttls()
+
+        if config.get("password") is not None:
+            username = config.get("username", config["from"])
+            try:
+                server.login(username, config["password"])
+            except smtplib.SMTPNotSupportedError:
+                logger.warning(
+                    "Authentication not supported, continuing without authentication. "
+                    "Unset 'email.password' in the config to suppress this message"
+                )
+
+        recipients = [*config["to"], *config.get("cc", [])]
+        server.sendmail(config["from"], recipients, payload)
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,7 @@ setup(
        'console_scripts': [
            'brian-polling-manager=brian_polling_manager.main:cli',
            'get-interface-stats=brian_polling_manager.interface_stats.cli:cli'
+            'report-interface-errors=brian_polling_manager.error_report.cli:cli'
        ]
    },
    include_package_data=True,

--- a/test/error_report/data/full-inventory.json
+++ b/test/error_report/data/full-inventory.json
--- a/test/error_report/data/small-inventory.json
+++ b/test/error_report/data/small-inventory.json
+[
+  {
+      "router": "mx1.ams.nl.geant.net",
+      "name": "ae1",
+      "bundle": [],
+      "bundle-parents": [],
+      "description": "PHY blah blah",
+      "circuits": [
+          {
+              "id": 12112,
+              "name": "something",
+              "type": "SERVICE",
+              "status": "operational"
+          }
+      ],
+      "snmp-index": 1211
+  },
+  {
+      "router": "mx1.fra.de.geant.net",
+      "name": "ae10",
+      "bundle": [],
+      "bundle-parents": [],
+      "description": "PHY blah blah foo",
+      "circuits": [
+          {
+              "id": 50028,
+              "name": "something",
+              "type": "SERVICE",
+              "status": "operational"
+          }
+      ],
+      "snmp-index": 1006
+  },
+  {
+      "router": "mx1.fra.de.geant.net",
+      "name": "ae99.1",
+      "bundle": [],
+      "bundle-parents": [],
+      "description": "SRV blah blah bar",
+      "circuits": [
+          {
+              "id": 50028,
+              "name": "something",
+              "type": "SERVICE",
+              "status": "operational"
+          }
+      ],
+      "snmp-index": 9999
+  }
+]
--- a/test/error_report/data/test_render_html-expected.html
+++ b/test/error_report/data/test_render_html-expected.html
+<html>
+<body>
+<pre>
+=================================
+mx1.ams.nl.geant.net
+=================================
+	ae1	         PHY blah blah
+		framing-errors		4		Diff:	2
+		input-drops	2		Diff:	1
+
+=================================
+mx1.fra.de.geant.net
+=================================
+	ae10	         PHY blah blah foo
+		input-drops	3
+
+
+
+
+Generated <some date>
+</pre>
+</body>
+</html>
\ No newline at end of file
--- a/test/error_report/data/test_render_html_with_exclusions-expected.html
+++ b/test/error_report/data/test_render_html_with_exclusions-expected.html
+<html>
+<body>
+<pre>
+=================================
+mx1.ams.nl.geant.net
+=================================
+	ae1	         PHY blah blah
+		input-drops	2
+
+ROUTER,INTERFACE,FRAMING ERRORS,BIT ERROR SECONDS,ERRORED BLOCKS SECONDS,CRC ERRORS,TOTAL ERRORS,INPUT DISCARDS,INPUT DROPS,OUTPUT DROPS
+mx1.fra.de.geant.net,ae10,1,2,3,4,5,6,7,8,PHY blah blah foo
+
+
+
+Generated <some date>
+</pre>
+</body>
+</html>
\ No newline at end of file
--- a/test/error_report/test_error_report.py
+++ b/test/error_report/test_error_report.py
+import base64
+import hashlib
+import json
+
+import pathlib
+import smtplib
+from unittest.mock import Mock, patch, call
+from brian_polling_manager.error_report.report import (
+    SMTP_TIMEOUT_SECONDS,
+    render_email,
+    render_html,
+    send_email,
+)
+import jsonschema
+import pytest
+from brian_polling_manager.error_report import cli, config
+from brian_polling_manager.error_report.cli import (
+    ERROR_FIELDS,
+    INFLUX_TIME_WINDOW_TODAY,
+    INFLUX_TIME_WINDOW_YESTERDAY,
+    PROCESSED_ERROR_COUNTERS_SCHEMA,
+    _filter_and_sort_interfaces,
+    get_error_points,
+    get_relevant_interfaces,
+    interface_errors,
+    is_excluded_interface,
+    select_error_fields,
+)
+
+from click.testing import CliRunner
+
+DATA_DIR = pathlib.Path(__file__).parent / "data"
+
+
+@pytest.fixture(scope="session")
+def full_inventory():
+    return json.loads(((DATA_DIR / "full-inventory.json").read_text()))
+
+
+@pytest.fixture(scope="session")
+def small_inventory():
+    return json.loads(((DATA_DIR / "small-inventory.json").read_text()))
+
+
+@pytest.fixture
+def mock_influx_client():
+    class FakeInfluxClient:
+        """
+        Fake influx client, see `create_error_point` for usage how to set it up
+        """
+
+        INFLUX_ERROR_FIELDS = {v.replace("-", "_"): k for k, v in ERROR_FIELDS.items()}
+
+        def __init__(self) -> None:
+            self.today = []
+            self.yesterday = []
+
+        def __enter__(self):
+            pass
+
+        def __exit__(self, *args, **kwargs):
+            pass
+
+        def add_point(self, hostname, interface, timestamp, payload):
+            converted_payload = {
+                self.INFLUX_ERROR_FIELDS[k]: v for k, v in payload.items()
+            }
+            point = (
+                ("errors", {"hostname": hostname, "interface_name": interface}),
+                iter([converted_payload]),
+            )
+            if timestamp == "today":
+                return self.today.append(point)
+            if timestamp == "yesterday":
+                return self.yesterday.append(point)
+            raise ValueError(
+                "'timestamp' argument must be either 'today' or 'yesterday'"
+            )
+
+        def query(self, q):
+            result = Mock()
+            if INFLUX_TIME_WINDOW_YESTERDAY in q:
+                result.items.return_value = self.yesterday
+            else:
+                result.items.return_value = self.today
+            return result
+
+    return FakeInfluxClient()
+
+
+@pytest.fixture
+def create_error_point(mock_influx_client):
+    """Fixture for creating fake influx error points.
+    :param hostname: hostname
+    :param interface: interface name
+    :param timestamp: either ``today`` or ``yesterday``
+    :param kwargs: one or more error fields with integer value:
+        ``input_framing_errors``, ``bit_error_seconds``,``errored_blocks_seconds``,
+        ``input_crc_errors``, ``input_total_errors``,``input_discards``,``input_drops``,
+        ``output_drops``
+
+    The created error points will be returned by ``mock_influx_client`` fixture
+    """
+
+    def _create(hostname, interface, timestamp, **fields):
+        mock_influx_client.add_point(hostname, interface, timestamp, payload=fields)
+        return {(hostname, interface): fields}
+
+    return _create
+
+
+@pytest.fixture
+def get_interface_errors(small_inventory, mock_influx_client):
+    interfaces = _filter_and_sort_interfaces(small_inventory)
+
+    def _get_interface_errors(**kwargs):
+        defaults = {
+            "client": mock_influx_client,
+            "interface_info": interfaces,
+            "errors": ERROR_FIELDS,
+        }
+        defaults.update(kwargs)
+        result = interface_errors(**defaults)
+        jsonschema.validate(result, PROCESSED_ERROR_COUNTERS_SCHEMA)
+        return result
+
+    return _get_interface_errors
+
+
+def test_validate_config(tmp_path):
+    config_file = tmp_path / "config.json"
+    content = {
+        "email": {
+            "from": "noreply@geant.org",
+            "reply_to": "noreply@geant.org",
+            "to": "some-bogus-email",
+            "cc": ["recipient01@geant.org", "recipient02@geant.org"],
+            "hostname": "some.smtp.server",
+            "username": "smtp-user",
+            "password": "smtp-password",
+            "starttls": False,
+        },
+        "inventory": ["blah"],
+        "influx": {
+            "hostname": "hostname",
+            "database": "dbname",
+            "measurement": "errors",
+            "username": "some-username",
+            "password": "user-password",
+        },
+        "exclude-interfaces": ["SOME DESCRIPTION PART"],
+    }
+    config_file.write_text(json.dumps(content))
+    result = config.load(config_file)
+    assert result == {
+        "email": {
+            "from": "noreply@geant.org",
+            "reply_to": "noreply@geant.org",
+            "to": ["some-bogus-email"],
+            "cc": ["recipient01@geant.org", "recipient02@geant.org"],
+            "hostname": "some.smtp.server",
+            "username": "smtp-user",
+            "password": "smtp-password",
+            "starttls": False,
+        },
+        "inventory": ["blah"],
+        "influx": {
+            "hostname": "hostname",
+            "database": "dbname",
+            "measurement": "errors",
+            "username": "some-username",
+            "password": "user-password",
+        },
+        "exclude-interfaces": ["SOME DESCRIPTION PART"],
+    }
+
+
+def test_get_relevant_interfaces(full_inventory):
+    with patch.object(cli, "load_interfaces", return_value=full_inventory) as mock:
+        result = get_relevant_interfaces("some-host")
+        assert mock.call_args == call("some-host")
+        assert 0 < len(result) < len(full_inventory)
+        assert all(
+            "PHY" in i["description"].upper()
+            and "SPARE" not in i["description"].upper()
+            and "NON-OPERATIONAL" not in i["description"].upper()
+            and "RESERVED" not in i["description"].upper()
+            and "TEST" not in i["description"].upper()
+            and "dsc." not in i["name"].lower()
+            and "fxp" not in i["name"].lower()
+            for i in result.values()
+        )
+
+
+@pytest.mark.parametrize(
+    "description, exclusions, is_excluded",
+    [
+        ("DESC", [], False),
+        ("DESC", ["DESC"], True),
+        ("DESC", ["desc"], True),
+        ("DESC", ["DESCMORE"], False),
+        ("DESC", ["MORE", "DESC"], True),
+        ("", ["DESC"], False),
+    ],
+)
+def test_excluded_interface(description, exclusions, is_excluded):
+    assert is_excluded_interface(description, exclusions) == is_excluded
+
+
+def test_get_error_points(mock_influx_client, create_error_point):
+    create_error_point("some.rtr", "ifc.0", "today", framing_errors=1, input_drops=2)
+    create_error_point("some.rtr", "ifc.1", "today", framing_errors=3, output_drops=4)
+    create_error_point("some.rtr", "ifc.2", "yesterday", framing_errors=3)
+    assert get_error_points(mock_influx_client, INFLUX_TIME_WINDOW_TODAY) == {
+        ("some.rtr", "ifc.0"): {"last_input_framing_errors": 1, "last_input_drops": 2},
+        ("some.rtr", "ifc.1"): {"last_input_framing_errors": 3, "last_output_drops": 4},
+    }
+
+
+def test_select_field_adds_missing_error_count():
+    assert select_error_fields({"a": 1}, {"a": "A", "b": "B"}) == {"A": 1, "B": 0}
+
+
+def test_select_error_fields_substitutes_none():
+    assert select_error_fields({"a": 1, "b": None}, {"a": "A", "b": "B"}) == {
+        "A": 1,
+        "B": 0,
+    }
+
+
+def test_select_error_fields_skips_other_fields():
+    assert select_error_fields({"a": 1, "b": 2}, {"a": "A"}) == {
+        "A": 1,
+    }
+
+
+def test_interface_errors_with_new_errors(create_error_point, get_interface_errors):
+    create_error_point(
+        "mx1.ams.nl.geant.net",
+        "ae1",
+        "today",
+        framing_errors=1,
+        bit_error_seconds=2,
+        errored_blocks_seconds=3,
+        input_crc_errors=4,
+        input_total_errors=5,
+        input_discards=6,
+        input_drops=7,
+        output_drops=8,
+    )
+    errors = get_interface_errors()
+    assert errors == {
+        "interfaces": [
+            {
+                "router": "mx1.ams.nl.geant.net",
+                "interface": "ae1",
+                "description": "PHY blah blah",
+                "error_counters": {
+                    "framing-errors": 1,
+                    "bit-error-seconds": 2,
+                    "errored-blocks-seconds": 3,
+                    "input-crc-errors": 4,
+                    "input-total-errors": 5,
+                    "input-discards": 6,
+                    "input-drops": 7,
+                    "output-drops": 8,
+                },
+            }
+        ],
+        "excluded_interfaces": [],
+    }
+
+
+def test_interface_errors_with_multiple_interfaces(
+    create_error_point, get_interface_errors
+):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today", framing_errors=1)
+    create_error_point("mx1.fra.de.geant.net", "ae10", "today", framing_errors=2)
+    errors = get_interface_errors()
+    assert errors["interfaces"] == [
+        {
+            "router": "mx1.ams.nl.geant.net",
+            "interface": "ae1",
+            "description": "PHY blah blah",
+            "error_counters": {
+                "framing-errors": 1,
+            },
+        },
+        {
+            "router": "mx1.fra.de.geant.net",
+            "interface": "ae10",
+            "description": "PHY blah blah foo",
+            "error_counters": {
+                "framing-errors": 2,
+            },
+        },
+    ]
+
+
+def test_logs_message_on_missing_error_counters_for_interface(
+    create_error_point, get_interface_errors, caplog
+):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today", framing_errors=1)
+    get_interface_errors()
+    assert "mx1.fra.de.geant.net - ae10 not found in influx data" in caplog.text
+
+
+def test_does_not_check_for_logical_interfaces(get_interface_errors, caplog):
+    get_interface_errors()
+    assert "mx1.fra.de.geant.net - ae99.1" not in caplog.text
+
+
+def test_skips_interface_with_0_errors(create_error_point, get_interface_errors):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today")
+    errors = get_interface_errors()
+    assert not errors["interfaces"]
+
+
+def test_interface_errors_doesnt_include_0_errors(
+    create_error_point, get_interface_errors
+):
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=0
+    )
+    errors = get_interface_errors()
+    assert errors["interfaces"][0] == {
+        "router": "mx1.ams.nl.geant.net",
+        "interface": "ae1",
+        "description": "PHY blah blah",
+        "error_counters": {
+            "input-drops": 1,
+        },
+    }
+
+
+def test_increased_error_produces_diff(create_error_point, get_interface_errors):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1)
+
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=10)
+    errors = get_interface_errors()
+    assert errors["interfaces"][0] == {
+        "router": "mx1.ams.nl.geant.net",
+        "interface": "ae1",
+        "description": "PHY blah blah",
+        "error_counters": {
+            "input-drops": 10,
+        },
+        "diff": {
+            "input-drops": 9,
+        },
+    }
+
+
+def test_unchanged_errors_do_not_show_up_in_diff(
+    create_error_point, get_interface_errors
+):
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1, framing_errors=2
+    )
+
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "today", input_drops=10, framing_errors=2
+    )
+    errors = get_interface_errors()
+    assert errors["interfaces"][0]["diff"] == {"input-drops": 9}
+
+
+def test_skips_interface_when_no_errors_have_changed(
+    create_error_point, get_interface_errors
+):
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1, framing_errors=2
+    )
+
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=2
+    )
+    errors = get_interface_errors()
+    assert not errors["interfaces"]
+
+
+def test_processes_excluded_interface(create_error_point, get_interface_errors):
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=2
+    )
+    create_error_point(
+        "mx1.fra.de.geant.net", "ae10", "today", input_drops=3, framing_errors=4
+    )  # this interface is excluded through its description
+
+    errors = get_interface_errors(exclusions=["foo"])
+    assert errors["interfaces"] == [
+        {
+            "router": "mx1.ams.nl.geant.net",
+            "interface": "ae1",
+            "description": "PHY blah blah",
+            "error_counters": {"input-drops": 1, "framing-errors": 2},
+        }
+    ]
+    assert errors["excluded_interfaces"] == [
+        {
+            "router": "mx1.fra.de.geant.net",
+            "interface": "ae10",
+            "description": "PHY blah blah foo",
+            "error_counters": {
+                "input-drops": 3,
+                "framing-errors": 4,
+                "bit-error-seconds": 0,
+                "errored-blocks-seconds": 0,
+                "input-crc-errors": 0,
+                "input-total-errors": 0,
+                "input-discards": 0,
+                "output-drops": 0,
+            },
+        }
+    ]
+
+
+def test_render_html(create_error_point, get_interface_errors):
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1, framing_errors=2
+    )
+    create_error_point(
+        "mx1.ams.nl.geant.net", "ae1", "today", input_drops=2, framing_errors=4
+    )
+
+    create_error_point("mx1.fra.de.geant.net", "ae10", "today", input_drops=3)
+    errors = get_interface_errors()
+    result = render_html(errors=errors, date="<some date>")
+    # The expected value contains mixed tabs and spaces. We put it in a separate file
+    # to comply with flake8
+    expected = (DATA_DIR / "test_render_html-expected.html").read_text()
+    assert result == expected.replace("\n", "\r\n")
+
+
+def test_render_html_with_exclusions(create_error_point, get_interface_errors):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=2)
+
+    create_error_point(
+        "mx1.fra.de.geant.net",
+        "ae10",
+        "today",
+        # mess up order of kwargs to test re-ordering
+        bit_error_seconds=2,
+        framing_errors=1,
+        input_crc_errors=4,
+        errored_blocks_seconds=3,
+        input_discards=6,
+        input_total_errors=5,
+        output_drops=8,
+        input_drops=7,
+    )
+    errors = get_interface_errors(exclusions=["foo"])
+    result = render_html(errors=errors, date="<some date>")
+    # The expected value contains mixed tabs and spaces. We put it in a separate file
+    # to comply with flake8
+    expected = (DATA_DIR / "test_render_html_with_exclusions-expected.html").read_text()
+
+    assert result == expected.replace("\n", "\r\n")
+
+
+def test_render_email():
+    body = "<SOME_BODY>"
+    md5 = hashlib.md5(body.encode()).hexdigest()
+    b64 = base64.b64encode(body.encode()).decode()
+    config = {"from": "someone@geant.org", "to": ["someone.else@geant.org"]}
+    result = render_email(config, html=body, subject="<subject>")
+
+    assert "From: someone@geant.org" in result
+    assert "To: someone.else@geant.org" in result
+    assert md5 in result
+    assert b64 in result
+
+
+def test_render_email_for_multiple_recipients():
+    body = "<SOME_BODY>"
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org", "to2@geant.org"],
+        "cc": ["cc1@geant.org", "cc2@geant.org"],
+    }
+    result = render_email(config, html=body, subject="<subject>")
+
+    assert "To: someone.else@geant.org; to2@geant.org" in result
+    assert "Cc: cc1@geant.org; cc2@geant.org" in result
+
+
+@patch.object(smtplib, "SMTP")
+def test_send_email(SMTP):
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org"],
+        "hostname": "smtp.some.host",
+    }
+    send_email("<payload>", config)
+    assert SMTP.call_args == call(
+        host=config["hostname"], port=25, timeout=SMTP_TIMEOUT_SECONDS
+    )
+    assert SMTP().__enter__().sendmail.call_args == call(
+        config["from"], ["someone.else@geant.org"], "<payload>"
+    )
+
+
+@patch.object(smtplib, "SMTP")
+def test_send_email_to_multiple_recipients(SMTP):
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org", "to2@geant.org"],
+        "cc": ["cc1@geant.org", "cc2@geant.org"],
+        "hostname": "smtp.some.host",
+    }
+    send_email("<payload>", config)
+
+    assert SMTP().__enter__().sendmail.call_args == call(
+        config["from"],
+        ["someone.else@geant.org", "to2@geant.org", "cc1@geant.org", "cc2@geant.org"],
+        "<payload>",
+    )
+
+
+@patch.object(smtplib, "SMTP")
+def test_send_email_without_authentication(SMTP):
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org"],
+        "hostname": "smtp.some.host",
+    }
+    send_email("<payload>", config)
+
+    assert not SMTP().__enter__().starttls.called
+    assert not SMTP().__enter__().login.called
+
+
+@patch.object(smtplib, "SMTP")
+def test_send_email_with_password(SMTP):
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org", "to2@geant.org"],
+        "cc": ["cc1@geant.org", "cc2@geant.org"],
+        "hostname": "smtp.some.host",
+        "password": "some-password",
+    }
+    send_email("<payload>", config)
+
+    assert SMTP().__enter__().login.call_args == call(
+        config["from"], config["password"]
+    )
+
+
+@patch.object(smtplib, "SMTP")
+def test_send_email_with_starttls(SMTP):
+    config = {
+        "from": "someone@geant.org",
+        "to": ["someone.else@geant.org", "to2@geant.org"],
+        "cc": ["cc1@geant.org", "cc2@geant.org"],
+        "hostname": "smtp.some.host",
+        "starttls": True,
+    }
+    send_email("<payload>", config)
+
+    assert SMTP().__enter__().starttls.called
+
+
+@pytest.fixture
+def config_file(tmp_path):
+    config = {
+        "email": {
+            "from": "noreply@geant.org",
+            "reply_to": "noreply@geant.org",
+            "to": "some-bogus-email",
+            "hostname": "some.smtp.server",
+        },
+        "inventory": ["blah"],
+        "influx": {
+            "hostname": "hostname",
+            "database": "dbname",
+            "measurement": "errors",
+            "username": "some-username",
+            "password": "user-password",
+        },
+        "exclude-interfaces": ["FOO"],
+    }
+    path = tmp_path / "config.json"
+    path.write_text(json.dumps(config))
+    return path
+
+
+@patch.object(cli, "setup_logging")
+@patch.object(smtplib, "SMTP")
+def test_e2e(
+    SMTP,
+    unused_setup_logging,
+    mock_influx_client,
+    small_inventory,
+    config_file,
+    create_error_point,
+):
+    create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=1)
+
+    with patch.object(
+        cli, "load_interfaces", return_value=small_inventory
+    ), patch.object(cli, "influx_client", return_value=mock_influx_client):
+        runner = CliRunner()
+        result = runner.invoke(cli.cli, ["--config", str(config_file)])
+        assert result.exit_code == 0, str(result)
+
+        sendmail_call_args = SMTP().__enter__().sendmail.call_args[0]
+        assert sendmail_call_args[0] == "noreply@geant.org"
+        assert sendmail_call_args[1] == ["some-bogus-email"]
+        payload = sendmail_call_args[2]
+
+        assert "The latest errors report is attached." in payload
+
+        report_b64 = payload.split("\n\n")[-1]
+        report = base64.b64decode(report_b64).decode()
+
+        assert "mx1.ams.nl.geant.net" in report
+        assert "ae1" in report
+        assert "input-drops\t1" in report