diff --git a/brian_polling_manager/error_report/cli.py b/brian_polling_manager/error_report/cli.py index 4d6da0453986e64041f860ee6f4bdcc0b759b77d..f45becd1bda5829f41e1bd9a51cd5feaba87bb4a 100644 --- a/brian_polling_manager/error_report/cli.py +++ b/brian_polling_manager/error_report/cli.py @@ -1,3 +1,46 @@ +""" +Report Interface Errors +======================= + +CLI tool for generating an interface error report and sending it by email to the OC. + +Every day an email may be sent that summarizes interface errors for all (*) GEANT +routers and other network devices. The error report is sent as an html attachment to +that email. First all relevant routers and interfaces are requested from Inventory +Provider. Then InfluxDB is queried for the latest, and yesterday's, error measurement +points. For every interface, the latest error counts are compared against yesterday's +error count to determine whether it has suffered new errors. Currently the following +errors are checked: + + * ``framing-errors`` + * ``bit-error-seconds`` + * ``errored-blocks-seconds`` + * ``input-crc-errors`` + * ``input-total-errors`` + * ``input-discards`` + * ``input-drops`` + * ``output-drops`` + +For every interface with new errors is added to a summary report. This report is then +sent to the OC. + +.. code-block:: bash + + Usage: report-interface-errors [OPTIONS] + + Options: + --config PATH Path to a config file for this tool. The schema this config + file must adhere to can be found in + ``brian_polling_manager.error_report.config.ERROR_REPORT_CONFIG_SCHEMA`` + +[2024-04-09] This tool is the successor of a bash-script that was used before. That +script has some peculiarities in it's output and as of this new version mimics the +output of the earlier tool as much as possible. + +*) There are some rules which routers/interfaces to include and exclude. See the +`get_relevant_interfaces`_ function for more details. +""" + from datetime import datetime import json import logging @@ -6,16 +49,17 @@ import pathlib from typing import Sequence from brian_polling_manager.interface_stats.services import influx_client from brian_polling_manager.inventory import load_interfaces +import click from influxdb import InfluxDBClient from brian_polling_manager.error_report.config import load -from brian_polling_manager.error_report.mailer import render_html +from brian_polling_manager.error_report.report import ( + render_email, + render_html, + send_email, +) logger = logging.getLogger(__name__) -CONFIG_FILE = "/home/pellek/develop/klanten/geant/error_report-config.json" - -INFLUX_TIME_WINDOW_TODAY = "time > now() - 1d" -INFLUX_TIME_WINDOW_YESTERDAY = "time < now() - 1d and time > now() - 2d" # The error field names in the influx query vs their reporting name ERROR_FIELDS = { @@ -29,6 +73,8 @@ ERROR_FIELDS = { "last_output_drops": "output-drops", } +INFLUX_TIME_WINDOW_TODAY = "time > now() - 1d" +INFLUX_TIME_WINDOW_YESTERDAY = "time < now() - 1d and time > now() - 2d" PROCESSED_ERROR_COUNTERS_SCHEMA = { "$schema": "https://json-schema.org/draft/2020-12/schema", @@ -106,7 +152,7 @@ LOGGING_DEFAULT_CONFIG = { "class": "logging.StreamHandler", "level": "INFO", "formatter": "simple", - "stream": "ext://sys.stderr", + "stream": "ext://sys.stdout", }, }, "loggers": { @@ -276,27 +322,53 @@ def _filter_and_convert_interfaces(interfaces): ) -def main(): - setup_logging() - config = load(config_file=pathlib.Path(CONFIG_FILE)) +def main(config: dict): + """Main function for the error reporting script + + :param config: An instance of `ERROR_REPORT_CONFIG_SCHEMA` + """ + logger.info(f"Retrieving interfaces from inventory provider: {config['inventory']}") + all_interfaces = get_relevant_interfaces(config["inventory"]) client = influx_client(config["influx"]) with client: + logger.info("Retrieving error points from influxdb...") all_error_counters = interface_errors( client, interface_info=all_interfaces, errors=ERROR_FIELDS, exclusions=config["exclude-interfaces"], ) - body = render_html( - all_error_counters, - date=datetime.utcnow().strftime("%a %d %b %H:%M:%S UTC %Y"), - ) - + logger.info("Generating report...") - # TODO: ensure data is from the day that we're interested in (today or yesterday) - # TODO: send script failures to admin email + body = render_html( + all_error_counters, + date=datetime.utcnow().strftime("%a %d %b %H:%M:%S UTC %Y"), + ) + email = render_email(config["email"], html=body) + logger.info("Sending email...") + send_email(email, config=config["email"]) + logger.info("Done!") + + +@click.command() +@click.option( + "-c", + "--config", + type=click.Path( + exists=True, + file_okay=True, + dir_okay=False, + readable=True, + path_type=pathlib.Path, + ), + help="path to a config file", +) +def cli(config): + setup_logging() + config = load(config_file=config) + main(config) if __name__ == "__main__": - main() + cli() diff --git a/brian_polling_manager/error_report/config-example.json b/brian_polling_manager/error_report/config-example.json index 336a28d2b33abfda5a0591715518882fba7cd7a0..06d6f303fe36cb6c07241e6bb4c4338bb4a0fce3 100644 --- a/brian_polling_manager/error_report/config-example.json +++ b/brian_polling_manager/error_report/config-example.json @@ -4,7 +4,9 @@ "reply_to": "noreply@geant.org", "to": "some-bogus-email", "cc": "some-cc", - "contact": "someone@geant.org / NE team" + "hostname": "some.smtp.server", + "username": "smtp-user", + "password": "smtp-password" }, "inventory": ["blah"], "influx": { diff --git a/brian_polling_manager/error_report/config.py b/brian_polling_manager/error_report/config.py index e39177a7fecf2dd2b30fb2c6188b0e0a73aefae7..2614a4e191af3191867ad90466b1fb3717de69b6 100644 --- a/brian_polling_manager/error_report/config.py +++ b/brian_polling_manager/error_report/config.py @@ -6,7 +6,7 @@ import jsonschema logger = logging.getLogger(__name__) -CONFIG_SCHEMA = { +ERROR_REPORT_CONFIG_SCHEMA = { "$schema": "https://json-schema.org/draft/2020-12/schema", "definitions": { "email-params": { @@ -14,12 +14,26 @@ CONFIG_SCHEMA = { "properties": { "from": {"type": "string"}, "reply_to": {"type": "string"}, - "to": {"type": "string"}, - "cc": {"type": "string"}, - "contact": {"type": "string"}, + "to": {"$ref": "#/definitions/string-or-array"}, + "cc": {"$ref": "#/definitions/string-or-array"}, + "hostname": {"type": "string"}, + "username": {"type": "string"}, + "password": {"type": "string"}, + "starttls": {"type": "boolean"}, }, + "required": [ + "from", + "to", + "hostname", + ], "additionalProperties": False, }, + "string-or-array": { + "oneOf": [ + {"type": "string"}, + {"type": "array", "items": {"type": "string"}, "minItems": 1}, + ] + }, "influx-db-measurement": { "type": "object", "properties": { @@ -71,6 +85,11 @@ def load(config_file: pathlib.Path): """ config = json.loads(config_file.read_text()) - jsonschema.validate(config, CONFIG_SCHEMA) + jsonschema.validate(config, ERROR_REPORT_CONFIG_SCHEMA) + # convert str addresses into list of str + if isinstance(config["email"]["to"], str): + config["email"]["to"] = [config["email"]["to"]] + if isinstance(config["email"].get("cc"), str): + config["email"]["cc"] = [config["email"]["cc"]] return config diff --git a/brian_polling_manager/error_report/email.jinja2 b/brian_polling_manager/error_report/email.jinja2 new file mode 100644 index 0000000000000000000000000000000000000000..90a06bfe30b8005a905b3a9b795cab7d2b436c94 --- /dev/null +++ b/brian_polling_manager/error_report/email.jinja2 @@ -0,0 +1,40 @@ +{#- +We have a lot of hardcoded stuff in here. Ideally we don't want to use jinja here +at all but use pythons EmailMessage or MIMEBase classes to construct the email. +However, there are a bunch of peculiarities in the email message that make it hard to +do this, unless we can simplify the email. But for now we stick to the original email +message format +-#} +From: {{ config.from }} +Sender: {{ config.from }} +{%- if config.reply_to is defined %} +Reply-To: {{ config.reply_to }} +{%- endif %} +To: {{ config.to | join('; ') }} +{%- if config.cc is defined %} +Cc: {{ config.cc | join('; ') }} +{%- endif %} +Mime-Version: 1.0 +Subject: {{ subject }} +X-Mailer: /home/neteam/code/juniper_errors_report/email.sh +Content-Type: multipart/mixed; boundary="-" + +--- +Content-Type: text/plain; format=flowed; charset=ISO-8859-1 +Content-Disposition: inline +Content-Transfer-Encoding: 8bit + +Hi, + +The latest errors report is attached. + +Regards, +neteam@neteam-server01.geant.org:/home/neteam/code/juniper_errors_report + +--- +Content-Type: text/plain; name="errors_report.html" +Content-Transfer-Encoding: base64 +Content-Disposition: inline; filename="errors_report.html" +Content-MD5: {{ md5_hash }} + +{{ b64_report | wordwrap(76) }} \ No newline at end of file diff --git a/brian_polling_manager/error_report/mailer.py b/brian_polling_manager/error_report/mailer.py deleted file mode 100644 index 32f797c633418bcd4a49cf3a4bf63bacf45388e8..0000000000000000000000000000000000000000 --- a/brian_polling_manager/error_report/mailer.py +++ /dev/null @@ -1,26 +0,0 @@ -import pathlib -import jinja2 - -THIS_DIR = pathlib.Path(__file__).parent - - -def send_email(errors, config): - pass - - -def render_html(errors, date): - """ - Render error struct to email body - - :param errors: an instance of `PROCESSED_ERROR_COUNTERS_SCHEMA` - :param template_file: a jinja2 templat to rende - """ - env = jinja2.Environment( - loader=jinja2.FileSystemLoader(THIS_DIR), newline_sequence='\r\n' - ) - template = env.get_template("error_report.html.jinja2") - return template.render(**errors, date=date) - - -if __name__ == "__main__": - pass diff --git a/brian_polling_manager/error_report/report.py b/brian_polling_manager/error_report/report.py new file mode 100644 index 0000000000000000000000000000000000000000..c2968486d97042b84c05f25ced7bd26e4c60d2fb --- /dev/null +++ b/brian_polling_manager/error_report/report.py @@ -0,0 +1,57 @@ +import logging +import pathlib +import smtplib +import jinja2 +import hashlib +import base64 + +THIS_DIR = pathlib.Path(__file__).parent +logger = logging.getLogger(__name__) + +SMTP_TIMEOUT = 5 # s + + +def render_html(errors, date): + """ + Render error struct to email body + + :param errors: an instance of `PROCESSED_ERROR_COUNTERS_SCHEMA` + :param template_file: a jinja2 template to render + """ + env = jinja2.Environment( + loader=jinja2.FileSystemLoader(THIS_DIR), newline_sequence="\r\n" + ) + template = env.get_template("error_report.html.jinja2") + return template.render(**errors, date=date) + + +def render_email( + email_config: dict, html: str, subject="GEANT Juniper Interface Errors Report" +): + env = jinja2.Environment(loader=jinja2.FileSystemLoader(THIS_DIR)) + template = env.get_template("email.jinja2") + html_as_bytes = html.encode() + md5_hash = hashlib.md5(html_as_bytes).hexdigest() + b64_report = base64.b64encode(html_as_bytes).decode() + return template.render( + config=email_config, subject=subject, md5_hash=md5_hash, b64_report=b64_report + ) + + +def send_email(payload: str, config: dict): + with smtplib.SMTP(host=config["hostname"], port=25, timeout=SMTP_TIMEOUT) as server: + if config.get("starttls"): + server.starttls() + + if config.get("password"): + username = config.get("username", config["from"]) + try: + server.login(username, config["password"]) + except smtplib.SMTPNotSupportedError: + logger.warning( + "Authentication not supported, continuing without authentication. " + "Unset 'email.password' in the config to suppress this message" + ) + + recipients = [*config["to"], *config.get("cc", [])] + server.sendmail(config["from"], recipients, payload) diff --git a/test/error_report/test_error_report.py b/test/error_report/test_error_report.py index 68196ac4d902e0696824a1e3caa3d4ed92566b27..721f19af7ceb8f9213bc3e7d338d6d5b12313d70 100644 --- a/test/error_report/test_error_report.py +++ b/test/error_report/test_error_report.py @@ -1,9 +1,17 @@ +import base64 +import hashlib import json import pathlib +import smtplib from unittest.mock import Mock, patch, call - -from brian_polling_manager.error_report.mailer import render_html +import brian_polling_manager.error_report.report +from brian_polling_manager.error_report.report import ( + SMTP_TIMEOUT, + render_email, + render_html, + send_email, +) import jsonschema import pytest from brian_polling_manager.error_report import cli, config @@ -20,6 +28,8 @@ from brian_polling_manager.error_report.cli import ( select_error_fields, ) +from click.testing import CliRunner + DATA_DIR = pathlib.Path(__file__).parent / "data" @@ -39,11 +49,6 @@ def small_inventory(): return json.loads(((DATA_DIR / "small-inventory.json").read_text())) -@pytest.fixture(params=["full_inventory", "small_inventory"]) -def inventory(request): - return request.getfixturevalue(request.param) - - @pytest.fixture def mock_influx_client(): class FakeInfluxClient: @@ -56,6 +61,9 @@ def mock_influx_client(): def __enter__(self): pass + def __exit__(self, *args, **kwargs): + pass + def add_point(self, hostname, interface, timestamp, payload): converted_payload = { self.INFLUX_ERROR_FIELDS[k]: v for k, v in payload.items() @@ -102,12 +110,6 @@ def create_error_point(mock_influx_client): return _create -@pytest.fixture -def mocked_load_interfaces(inventory): - with patch.object(cli, "load_interfaces", return_value=inventory) as mock: - yield mock - - @pytest.fixture def get_interface_errors(small_inventory, mock_influx_client): interfaces = _filter_and_convert_interfaces(small_inventory) @@ -133,7 +135,10 @@ def test_validate_config(tmp_path): "from": "noreply@geant.org", "reply_to": "noreply@geant.org", "to": "some-bogus-email", - "contact": "someone@geant.org / NE team", + "cc": ["recipient01@geant.org", "recipient02@geant.org"], + "hostname": "some.smtp.server", + "username": "smtp-user", + "password": "smtp-password", }, "inventory": ["blah"], "influx": { @@ -146,7 +151,27 @@ def test_validate_config(tmp_path): "exclude-interfaces": ["SOME DESCRIPTION PART"], } config_file.write_text(json.dumps(content)) - assert config.load(config_file) == content + result = config.load(config_file) + assert result == { + "email": { + "from": "noreply@geant.org", + "reply_to": "noreply@geant.org", + "to": ["some-bogus-email"], + "cc": ["recipient01@geant.org", "recipient02@geant.org"], + "hostname": "some.smtp.server", + "username": "smtp-user", + "password": "smtp-password", + }, + "inventory": ["blah"], + "influx": { + "hostname": "hostname", + "database": "dbname", + "measurement": "errors", + "username": "some-username", + "password": "user-password", + }, + "exclude-interfaces": ["SOME DESCRIPTION PART"], + } def test_get_relevant_interfaces(full_inventory): @@ -469,3 +494,165 @@ Generated <some date> </body> </html>""" assert result == expected.replace("\n", "\r\n") + + +def test_render_email(): + body = "<SOME_BODY>" + md5 = hashlib.md5(body.encode()).hexdigest() + b64 = base64.b64encode(body.encode()).decode() + config = {"from": "someone@geant.org", "to": ["someone.else@geant.org"]} + result = render_email(config, html=body, subject="<subject>") + + assert "From: someone@geant.org" in result + assert "To: someone.else@geant.org" in result + assert md5 in result + assert b64 in result + + +def test_render_email_for_multiple_recipients(): + body = "<SOME_BODY>" + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org", "to2@geant.org"], + "cc": ["cc1@geant.org", "cc2@geant.org"], + } + result = render_email(config, html=body, subject="<subject>") + + assert "To: someone.else@geant.org; to2@geant.org" in result + assert "Cc: cc1@geant.org; cc2@geant.org" in result + + +@patch.object(smtplib, "SMTP") +def test_send_email(SMTP): + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org"], + "hostname": "smtp.some.host", + } + send_email("<payload>", config) + assert SMTP.call_args == call( + host=config["hostname"], port=25, timeout=SMTP_TIMEOUT + ) + assert SMTP().__enter__().sendmail.call_args == call( + config["from"], ["someone.else@geant.org"], "<payload>" + ) + + +@patch.object(smtplib, "SMTP") +def test_send_email_to_multiple_recipients(SMTP): + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org", "to2@geant.org"], + "cc": ["cc1@geant.org", "cc2@geant.org"], + "hostname": "smtp.some.host", + } + send_email("<payload>", config) + + assert SMTP().__enter__().sendmail.call_args == call( + config["from"], + ["someone.else@geant.org", "to2@geant.org", "cc1@geant.org", "cc2@geant.org"], + "<payload>", + ) + + +@patch.object(smtplib, "SMTP") +def test_send_email_without_authentication(SMTP): + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org"], + "hostname": "smtp.some.host", + } + send_email("<payload>", config) + + assert not SMTP().__enter__().starttls.called + assert not SMTP().__enter__().login.called + + +@patch.object(smtplib, "SMTP") +def test_send_email_with_password(SMTP): + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org", "to2@geant.org"], + "cc": ["cc1@geant.org", "cc2@geant.org"], + "hostname": "smtp.some.host", + "password": "some-password", + } + send_email("<payload>", config) + + assert SMTP().__enter__().login.call_args == call( + config["from"], config["password"] + ) + + +@patch.object(smtplib, "SMTP") +def test_send_email_with_starttls(SMTP): + config = { + "from": "someone@geant.org", + "to": ["someone.else@geant.org", "to2@geant.org"], + "cc": ["cc1@geant.org", "cc2@geant.org"], + "hostname": "smtp.some.host", + "starttls": True, + } + send_email("<payload>", config) + + assert SMTP().__enter__().starttls.called + + +@pytest.fixture +def config_file(tmp_path): + config = { + "email": { + "from": "noreply@geant.org", + "reply_to": "noreply@geant.org", + "to": "some-bogus-email", + "hostname": "some.smtp.server", + }, + "inventory": ["blah"], + "influx": { + "hostname": "hostname", + "database": "dbname", + "measurement": "errors", + "username": "some-username", + "password": "user-password", + }, + "exclude-interfaces": ["FOO"], + } + path = tmp_path / "config.json" + path.write_text(json.dumps(config)) + return path + + +@patch.object(cli, "setup_logging") +@patch.object(smtplib, "SMTP") +def test_e2e( + SMTP, + unused_setup_logging, + mock_influx_client, + small_inventory, + config_file, + create_error_point, +): + create_error_point( + "mx1.ams.nl.geant.net", "ae1", "today", input_drops=1 + ) + + with patch.object( + cli, "load_interfaces", return_value=small_inventory + ), patch.object(cli, "influx_client", return_value=mock_influx_client): + runner = CliRunner() + result = runner.invoke(cli.cli, ["--config", str(config_file)]) + assert result.exit_code == 0, str(result) + + sendmail_call_args = SMTP().__enter__().sendmail.call_args[0] + assert sendmail_call_args[0] == "noreply@geant.org" + assert sendmail_call_args[1] == ["some-bogus-email"] + payload = sendmail_call_args[2] + + assert "The latest errors report is attached." in payload + + report_b64 = payload.split("\n\n")[-1] + report = base64.b64decode(report_b64).decode() + + assert "mx1.ams.nl.geant.net" in report + assert "ae1" in report + assert "input-drops\t1" in report