Skip to content
Snippets Groups Projects
Commit fbaf5612 authored by Pelle Koster's avatar Pelle Koster
Browse files

initial version of html report with tests

parent 0164cd2e
No related branches found
No related tags found
No related merge requests found
include brian_polling_manager/logging_default_config.json
include **/*.jinja2
\ No newline at end of file
from datetime import datetime
import json
import logging
import os
import pathlib
from typing import Sequence
from brian_polling_manager.interface_stats.services import influx_client
from brian_polling_manager.inventory import load_interfaces
from influxdb import InfluxDBClient
from brian_polling_manager.error_report.config import load
from brian_polling_manager.error_report.mailer import render_html
logger = logging.getLogger(__name__)
SEND_FROM = "ne@geant.org"
REPLY_TO = "noreply@geant.org"
SEND_TO = "pelle.koster@geant.org"
CONFIG_FILE = "/home/pellek/develop/klanten/geant/error_report-config.json"
TIME_WINDOW_TODAY = "time > now() - 1d"
TIME_WINDOW_YESTERDAY = "time > now() - 2d and time < now() - 1d"
# The desired error fields vs their field name in the influx query
ERROR_FIELDS = [
("framing-errors", "last_input_framing_errors"),
("bit-error-seconds", "last_bit_error_seconds"),
("errored-blocks-seconds", "last_errored_blocks_seconds"),
("input-crc-errors", "last_input_crc_errors"),
("input-total-errors", "last_input_total_errors"),
("input-discards", "last_input_discards"),
("input-drops", "last_input_drops"),
("output-drops", "last_output_drops"),
]
INFLUX_TIME_WINDOW_TODAY = "time > now() - 1d"
INFLUX_TIME_WINDOW_YESTERDAY = "time < now() - 1d and time > now() - 2d"
# The error field names in the influx query vs their reporting name
ERROR_FIELDS = {
"last_input_framing_errors": "framing-errors",
"last_bit_error_seconds": "bit-error-seconds",
"last_errored_blocks_seconds": "errored-blocks-seconds",
"last_input_crc_errors": "input-crc-errors",
"last_input_total_errors": "input-total-errors",
"last_input_discards": "input-discards",
"last_input_drops": "input-drops",
"last_output_drops": "output-drops",
}
PROCESSED_ERROR_COUNTERS_SCHEMA = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
......@@ -72,60 +74,89 @@ PROCESSED_ERROR_COUNTERS_SCHEMA = {
},
"type": "object",
"properties": {
"counters": {
"interfaces": {
"type": "array",
"item": {"$ref": "#/definitions/interface_error_counters"},
},
"excluded_counters": {
"excluded_interfaces": {
"type": "array",
"item": {"$ref": "#/definitions/excluded_interface_error_counters"},
},
},
"required": ["counters", "excluded_counters"],
"required": ["interfaces", "excluded_interfaces"],
"additionalProperties": False,
}
email_config = {
"counters": [
{
"router": "rt1.some.router.geant.net",
"interface": "ef-0/1/1",
"description": "SOME DESCRIPTION",
"error_counters": {"output-drops": 1},
"diff": {"output-drops": 1},
"has_new_errors": True,
LOGGING_DEFAULT_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {"simple": {"format": "%(asctime)s - %(levelname)s - %(message)s"}},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "simple",
"stream": "ext://sys.stderr",
},
],
"excluded_counters": [
{
"router": ...,
"interface": ...,
"description": ...,
"error_counters": ...,
},
],
},
"loggers": {
"brian_polling_manager": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
}
},
"root": {"level": "INFO", "handlers": ["console"]},
}
def setup_logging():
"""
set up logging using the configured filename
if LOGGING_CONFIG is defined in the environment, use this for
the filename, otherwise use LOGGING_DEFAULT_CONFIG
"""
logging_config = LOGGING_DEFAULT_CONFIG
if "LOGGING_CONFIG" in os.environ:
filename = os.environ["LOGGING_CONFIG"]
with open(filename) as f:
logging_config = json.loads(f.read())
logging.config.dictConfig(logging_config)
def get_error_points(client: InfluxDBClient, time_window: str):
"""Get the last value for every error field for every (router, interface)
:param client: an `InfluxDBCLient`_
:param time_window: an influx time window such as `INFLUX_TIME_WINDOW_TODAY` or
`INFLUX_TIME_WINDOW_YESTERDAY_
:returns: a dict {(router, interface): error_point } were error_point is a dict
with all the error field values for that respective interface
"""
raw_data = client.query(
# This query may actually return values from mulitple different points if
# some values are missing for the last point. But it's close enough.
(
"SELECT last(*) FROM errors "
f"WHERE {time_window} "
"group by hostname, interface_name order by time desc;"
f"SELECT last(*) FROM errors WHERE {time_window} "
"group by hostname, interface_name;"
)
)
return {
(tags["hostname"], tags["interface_name"]): next(points)
(tags["hostname"], tags["interface_name"]): next(points, {})
for (_, tags), points in raw_data.items()
}
def select_error_fields(errors, mapping):
# the extra `or 0` is for substituting None values
return {tgt: errors.get(src, 0) or 0 for src, tgt in mapping.items()}
def interface_errors(
client: InfluxDBClient, interface_info, errors, exclusions, raise_on_errors=False
client: InfluxDBClient, interface_info, errors, exclusions=(), raise_on_errors=False
):
"""
Retrieves error counters from influx
......@@ -133,14 +164,20 @@ def interface_errors(
:param client: InfluxDBClient for connecting to influx
:param interface_info: a dict of {(router, interface): info_dict} with interface
information coming from invprov (ie. the output from `get_relevant_interfaces`_)
:param errors: A list of (result_field, input_data_field) for every error to report
:param errors: A dict of (input_data_field: result_field) for every error to report
on (see `ERROR_FIELDS`_)
:param raise_on_errors: raise when certain exceptions occur (useful for testing)
:result: an instance of PROCESSED_ERROR_COUNTERS_SCHEMA
"""
todays_data = get_error_points(client, TIME_WINDOW_TODAY)
yesterdays_data = get_error_points(client, TIME_WINDOW_YESTERDAY)
todays_data = {
key: select_error_fields(val, mapping=errors)
for key, val in get_error_points(client, INFLUX_TIME_WINDOW_TODAY).items()
}
yesterdays_data = {
key: select_error_fields(val, mapping=errors)
for key, val in get_error_points(client, INFLUX_TIME_WINDOW_YESTERDAY).items()
}
result = {"interfaces": [], "excluded_interfaces": []}
for (router, ifc), info in interface_info.items():
......@@ -151,28 +188,45 @@ def interface_errors(
if raise_on_errors:
raise
continue
if not any(err > 0 for err in today.values()):
# skip interfaces without any errors
continue
yesterday = yesterdays_data.get((router, ifc), {})
counters = {
"error_counters": {err[0]: (today[err[1]] or 0) for (err) in errors},
"router": router,
"interface": ifc,
"error_counters": today,
"description": info["description"],
}
if not is_excluded_interface(info["description"], exclusions):
counters["diff"] = {
err[0]: (today[err[1]] or 0) - (yesterday.get(err[1], 0) or 0)
for err in errors
}
# This is strictly not the most correct way to determine whether we have
# new errors (During the day the error count may have reset, some diffs may
# actually be negative), but it is (mostly) how it was determined previously
counters["has_new_errors"] = bool(
sum(v for v in counters["diff"].values() if v > 0)
)
nonzero_errors = {err: val for err, val in today.items() if val > 0}
counters["error_counters"] = nonzero_errors
if any(yesterday.values()):
# we have existing errors
# This is strictly not the most correct way to determine differences.
# during the day the error count may have reset and diffs may actually
# be negative, but we ignore those because that is (mostly) how it was
# done in the orginal bash script
diff = {
err: (val - yesterday[err])
for err, val in nonzero_errors.items()
if (val - yesterday[err]) > 0
}
if not diff:
# Skip interface if it does not have any increased error counters
continue
counters["diff"] = diff
result["interfaces"].append(counters)
else:
logger.info(f"Found excluded interface {router} - {ifc}")
result["excluded_interfaces"].append(counters)
return result
......@@ -184,31 +238,40 @@ def is_excluded_interface(description: str, exclusions: Sequence[str]):
return any(excl.lower() in description.lower() for excl in exclusions)
def get_relevant_interfaces(hosts, load_interfaces_=load_interfaces):
"""Get interface info from inventory provider. Some interfaces are not considered
based on there description"""
def get_relevant_interfaces(hosts):
"""Get interface info from inventory provider. Some interfaces are considered
irrelevant based on there description"""
return _filter_and_convert_interfaces(load_interfaces(hosts))
def _filter_and_convert_interfaces(interfaces):
# We may want to put this logic inside inventory provider and serve from a new
# endpoint
return {
(i["router"], i["name"]): i
for i in load_interfaces_(hosts)
if all(
(
"PHY" in i["description"].upper(),
"SPARE" not in i["description"].upper(),
"NON-OPERATIONAL" not in i["description"].upper(),
"RESERVED" not in i["description"].upper(),
"TEST" not in i["description"].upper(),
return dict(
sorted(
((i["router"], i["name"]), i)
for i in interfaces
if all(
(
"PHY" in i["description"].upper(),
"SPARE" not in i["description"].upper(),
"NON-OPERATIONAL" not in i["description"].upper(),
"RESERVED" not in i["description"].upper(),
"TEST" not in i["description"].upper(),
"dsc." not in i["name"].lower(),
"fxp" not in i["name"].lower(),
)
)
)
}
)
def main():
setup_logging()
config = load(config_file=pathlib.Path(CONFIG_FILE))
client = influx_client(config["influx"])
all_interfaces = get_relevant_interfaces(config["inventory"])
client = influx_client(config["influx"])
with client:
all_error_counters = interface_errors(
client,
......@@ -216,6 +279,11 @@ def main():
errors=ERROR_FIELDS,
exclusions=config["exclude-interfaces"],
)
body = render_html(
all_error_counters,
date=datetime.utcnow().strftime("%a %d %b %H:%M:%S UTC %Y"),
)
print(body)
# TODO: ensure data is from the day that we're interested in (today or yesterday)
# TODO: send script failures to admin email
......
<html>
<body>
<pre>
{%- if interfaces %}
{%- for ifc in interfaces %}
=================================
{{ ifc.router }}
=================================
{{ ifc.interface }} {{ ifc.description }}
{%- for %}
output-drops 1746 Diff: 626
{%- endfor %}
</pre>
</body>
</html>
<html>
<body>
<pre>
{%- for ifc in interfaces %}
=================================
{{ ifc.router }}
=================================
{{ ifc.interface }} {{ ifc.description }}
{%- if ifc.diff %}
{%- for err, diff in ifc.diff.items() %}
{{ err }}{{ " " if err == "framing-errors" else "" }} {{ ifc.error_counters[err] }} Diff: {{ diff }}
{%- endfor %}
{%- else %}
{%- for err, val in ifc.error_counters.items() %}
{{ err }}{{ " " if err == "framing-errors" else "" }} {{ val }}
{%- endfor %}
{%- endif %}
{{ '' }}
{%- endfor %}
{%- if excluded_interfaces %}
ROUTER,INTERFACE,FRAMING ERRORS,BIT ERROR SECONDS,ERRORED BLOCKS SECONDS,CRC ERRORS,TOTAL ERRORS,INPUT DISCARDS,INPUT DROPS,OUTPUT DROPS
{%- for ifc in excluded_interfaces %}
{{ifc.router}},{{ifc.interface}},{{ ifc.error_counters.values() | join(',') }},{{ifc.description}}
{%- endfor %}
{%- endif %}
Generated {{ date }}
</pre>
</body>
</html>
import pathlib
import jinja2
THIS_DIR = pathlib.Path(__file__).parent
def send_email(errors, config):
pass
def render_body(errors, template_file):
pass
def render_html(errors, date):
"""
Render error struct to email body
:param errors: an instance of `PROCESSED_ERROR_COUNTERS_SCHEMA`
:param template_file: a jinja2 templat to rende
"""
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(THIS_DIR), newline_sequence='\r\n'
)
template = env.get_template("error_report.html.jinja2")
return template.render(**errors, date=date)
if __name__ == "__main__":
pass
This diff is collapsed.
[
{
"router": "mx1.ams.nl.geant.net",
"name": "ae1",
"bundle": [],
"bundle-parents": [],
"description": "PHY blah blah",
"circuits": [
{
"id": 12112,
"name": "something",
"type": "SERVICE",
"status": "operational"
}
],
"snmp-index": 1211
},
{
"router": "mx1.fra.de.geant.net",
"name": "ae10",
"bundle": [],
"bundle-parents": [],
"description": "PHY blah blah foo",
"circuits": [
{
"id": 50028,
"name": "something",
"type": "SERVICE",
"status": "operational"
}
],
"snmp-index": 1006
},
{
"router": "mx1.fra.de.geant.net",
"name": "ae99.1",
"bundle": [],
"bundle-parents": [],
"description": "SRV blah blah bar",
"circuits": [
{
"id": 50028,
"name": "something",
"type": "SERVICE",
"status": "operational"
}
],
"snmp-index": 9999
}
]
import json
import pathlib
from unittest.mock import Mock, patch, call
import jsonschema
import pytest
from brian_polling_manager.error_report import cli, config
from brian_polling_manager.error_report.cli import (
ERROR_FIELDS,
INFLUX_TIME_WINDOW_TODAY,
INFLUX_TIME_WINDOW_YESTERDAY,
PROCESSED_ERROR_COUNTERS_SCHEMA,
_filter_and_convert_interfaces,
get_error_points,
get_relevant_interfaces,
interface_errors,
is_excluded_interface,
select_error_fields,
)
DATA_DIR = pathlib.Path(__file__).parent / "data"
@pytest.fixture(autouse=True)
def mock_setup_logging():
with patch.object(cli, "setup_logging") as mock:
yield mock
@pytest.fixture(scope="session")
def full_inventory():
return json.loads(((DATA_DIR / "full-inventory.json").read_text()))
@pytest.fixture(scope="session")
def small_inventory():
return json.loads(((DATA_DIR / "small-inventory.json").read_text()))
@pytest.fixture(params=["full_inventory", "small_inventory"])
def inventory(request):
return request.getfixturevalue(request.param)
@pytest.fixture
def mock_influx_client():
class FakeInfluxClient:
INFLUX_ERROR_FIELDS = {v.replace("-", "_"): k for k, v in ERROR_FIELDS.items()}
def __init__(self) -> None:
self.today = []
self.yesterday = []
def __enter__(self):
pass
def add_point(self, hostname, interface, timestamp, payload):
converted_payload = {
self.INFLUX_ERROR_FIELDS[k]: v for k, v in payload.items()
}
point = (
("errors", {"hostname": hostname, "interface_name": interface}),
iter([converted_payload]),
)
if timestamp == "today":
return self.today.append(point)
if timestamp == "yesterday":
return self.yesterday.append(point)
raise ValueError("'when' argument must be either 'today' or 'yesterday ")
def query(self, q):
result = Mock()
if INFLUX_TIME_WINDOW_YESTERDAY in q:
result.items.return_value = self.yesterday
else:
result.items.return_value = self.today
return result
return FakeInfluxClient()
@pytest.fixture
def create_error_point(mock_influx_client):
"""Fixture for creating fake influx error points.
:param hostname: hostname
:param interface: interface name
:param timestamp: either ``today`` or ``yesterday``
:param kwargs: one or more error fields with integer value:
``input_framing_errors``, ``bit_error_seconds``,``errored_blocks_seconds``,
``input_crc_errors``, ``input_total_errors``,``input_discards``,``input_drops``,
``output_drops``
The created error points will be returned by the mock_influx fixture
"""
def _create(hostname, interface, timestamp, **fields):
mock_influx_client.add_point(hostname, interface, timestamp, payload=fields)
return {(hostname, interface): fields}
return _create
@pytest.fixture
def mocked_load_interfaces(inventory):
with patch.object(cli, "load_interfaces", return_value=inventory) as mock:
yield mock
@pytest.fixture
def get_interface_errors(small_inventory, mock_influx_client):
interfaces = _filter_and_convert_interfaces(small_inventory)
def _get(**kwargs):
defaults = {
"client": mock_influx_client,
"interface_info": interfaces,
"errors": ERROR_FIELDS,
}
defaults.update(kwargs)
result = interface_errors(**defaults)
jsonschema.validate(result, PROCESSED_ERROR_COUNTERS_SCHEMA)
return result
return _get
def test_validate_config(tmp_path):
config_file = tmp_path / "config.json"
content = {
"email": {
"from": "noreply@geant.org",
"reply-to": "noreply@geant.org",
"to": "some-bogus-email",
"contact": "someone@geant.org / NE team",
},
"inventory": ["blah"],
"influx": {
"hostname": "hostname",
"database": "dbname",
"measurement": "errors",
"username": "some-username",
"password": "user-password",
},
"exclude-interfaces": ["SOME DESCRIPTION PART"],
}
config_file.write_text(json.dumps(content))
assert config.load(config_file) == content
def test_get_relevant_interfaces(full_inventory):
with patch.object(cli, "load_interfaces", return_value=full_inventory) as mock:
result = get_relevant_interfaces("some-host")
assert mock.call_args == call("some-host")
assert 0 < len(result) < len(full_inventory)
assert all(
"PHY" in i["description"].upper()
and "SPARE" not in i["description"].upper()
and "NON-OPERATIONAL" not in i["description"].upper()
and "RESERVED" not in i["description"].upper()
and "TEST" not in i["description"].upper()
and "dsc." not in i["name"].lower()
and "fxp" not in i["name"].lower()
for i in result.values()
)
@pytest.mark.parametrize(
"description, exclusions, is_excluded",
[
("DESC", [], False),
("DESC", ["DESC"], True),
("DESC", ["desc"], True),
("DESC", ["DESCMORE"], False),
("DESC", ["MORE", "DESC"], True),
("", ["DESC"], False),
],
)
def test_excluded_interface(description, exclusions, is_excluded):
assert is_excluded_interface(description, exclusions) == is_excluded
def test_get_error_points(mock_influx_client, create_error_point):
create_error_point("some.rtr", "ifc.0", "today", framing_errors=1, input_drops=2)
create_error_point("some.rtr", "ifc.1", "today", framing_errors=3, output_drops=4)
create_error_point("some.rtr", "ifc.2", "yesterday", framing_errors=3)
assert get_error_points(mock_influx_client, INFLUX_TIME_WINDOW_TODAY) == {
("some.rtr", "ifc.0"): {"last_input_framing_errors": 1, "last_input_drops": 2},
("some.rtr", "ifc.1"): {"last_input_framing_errors": 3, "last_output_drops": 4},
}
def test_select_field_adds_missing_error_count():
assert select_error_fields({"a": 1}, {"a": "A", "b": "B"}) == {"A": 1, "B": 0}
def test_select_error_fields_substitutes_none():
assert select_error_fields({"a": 1, "b": None}, {"a": "A", "b": "B"}) == {
"A": 1,
"B": 0,
}
def test_select_error_fields_skips_other_fields():
assert select_error_fields({"a": 1, "b": 2}, {"a": "A"}) == {
"A": 1,
}
def test_interface_errors_with_new_errors(create_error_point, get_interface_errors):
create_error_point(
"mx1.ams.nl.geant.net",
"ae1",
"today",
framing_errors=1,
bit_error_seconds=2,
errored_blocks_seconds=3,
input_crc_errors=4,
input_total_errors=5,
input_discards=6,
input_drops=7,
output_drops=8,
)
errors = get_interface_errors()
assert errors == {
"interfaces": [
{
"router": "mx1.ams.nl.geant.net",
"interface": "ae1",
"description": "PHY blah blah",
"error_counters": {
"framing-errors": 1,
"bit-error-seconds": 2,
"errored-blocks-seconds": 3,
"input-crc-errors": 4,
"input-total-errors": 5,
"input-discards": 6,
"input-drops": 7,
"output-drops": 8,
},
}
],
"excluded_interfaces": [],
}
def test_interface_errors_with_multiple_interfaces(
create_error_point, get_interface_errors
):
create_error_point("mx1.ams.nl.geant.net", "ae1", "today", framing_errors=1)
create_error_point("mx1.fra.de.geant.net", "ae10", "today", framing_errors=2)
errors = get_interface_errors()
assert errors["interfaces"] == [
{
"router": "mx1.ams.nl.geant.net",
"interface": "ae1",
"description": "PHY blah blah",
"error_counters": {
"framing-errors": 1,
},
},
{
"router": "mx1.fra.de.geant.net",
"interface": "ae10",
"description": "PHY blah blah foo",
"error_counters": {
"framing-errors": 2,
},
},
]
def test_logs_message_on_missing_error_counters_for_interface(
create_error_point, get_interface_errors, caplog
):
create_error_point("mx1.ams.nl.geant.net", "ae1", "today", framing_errors=1)
get_interface_errors()
assert "mx1.fra.de.geant.net - ae10 not found in influx data" in caplog.text
def test_does_not_check_for_logical_interfaces(get_interface_errors, caplog):
get_interface_errors()
assert "mx1.fra.de.geant.net - ae99.1" not in caplog.text
def test_skips_interface_with_0_errors(create_error_point, get_interface_errors):
create_error_point("mx1.ams.nl.geant.net", "ae1", "today")
errors = get_interface_errors()
assert not errors["interfaces"]
def test_interface_errors_doesnt_include_0_errors(
create_error_point, get_interface_errors
):
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=0
)
errors = get_interface_errors()
assert errors["interfaces"][0] == {
"router": "mx1.ams.nl.geant.net",
"interface": "ae1",
"description": "PHY blah blah",
"error_counters": {
"input-drops": 1,
},
}
def test_increased_error_produces_diff(create_error_point, get_interface_errors):
create_error_point("mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1)
create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=10)
errors = get_interface_errors()
assert errors["interfaces"][0] == {
"router": "mx1.ams.nl.geant.net",
"interface": "ae1",
"description": "PHY blah blah",
"error_counters": {
"input-drops": 10,
},
"diff": {
"input-drops": 9,
},
}
def test_unchanged_errors_do_not_show_up_in_diff(
create_error_point, get_interface_errors
):
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1, framing_errors=2
)
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "today", input_drops=10, framing_errors=2
)
errors = get_interface_errors()
assert errors["interfaces"][0]["diff"] == {"input-drops": 9}
def test_skips_interface_when_no_errors_have_changed(
create_error_point, get_interface_errors
):
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1, framing_errors=2
)
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=2
)
errors = get_interface_errors()
assert not errors["interfaces"]
def test_processes_excluded_interface(create_error_point, get_interface_errors):
create_error_point(
"mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=2
)
create_error_point(
"mx1.fra.de.geant.net", "ae10", "today", input_drops=3, framing_errors=4
)
errors = get_interface_errors(exclusions=["foo"])
assert errors["interfaces"] == [
{
"router": "mx1.ams.nl.geant.net",
"interface": "ae1",
"description": "PHY blah blah",
"error_counters": {"input-drops": 1, "framing-errors": 2},
}
]
assert errors["excluded_interfaces"] == [
{
"router": "mx1.fra.de.geant.net",
"interface": "ae10",
"description": "PHY blah blah foo",
"error_counters": {
"input-drops": 3,
"framing-errors": 4,
"bit-error-seconds": 0,
"errored-blocks-seconds": 0,
"input-crc-errors": 0,
"input-total-errors": 0,
"input-discards": 0,
"output-drops": 0,
},
}
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment