diff --git a/brian_polling_manager/error_report/cli.py b/brian_polling_manager/error_report/cli.py index 2e59e4197324f0399b15912fb9c1d7a6d8938449..3f1d52a236c7ae8d2f068a6bfaea4fb156e1dcf2 100644 --- a/brian_polling_manager/error_report/cli.py +++ b/brian_polling_manager/error_report/cli.py @@ -12,14 +12,16 @@ points. For every interface, the latest error counts are compared against yester error count to determine whether it has suffered new errors. Currently the following errors are checked: - * ``framing-errors`` - * ``bit-error-seconds`` - * ``errored-blocks-seconds`` - * ``input-crc-errors`` - * ``input-total-errors`` - * ``input-discards`` - * ``input-drops`` - * ``output-drops`` + * ``bit-error-seconds`` (Juniper) + * ``errored-blocks-seconds`` (Juniper) + * ``framing-errors`` (Juniper) + * ``input-crc-errors`` (Juniper) + * ``input-total-errors`` (Juniper, Nokia) + * ``input-discards``(Juniper, Nokia) + * ``input-drops`` (Juniper) + * ``output-discards`` (Nokia) + * ``output-drops`` (Juniper) + * ``output-total-errors`` (Nokia) For every interface with new errors is added to a summary report. This report is then sent to the OC. @@ -60,14 +62,16 @@ logger = logging.getLogger(__name__) # The error field names in the influx query vs their reporting name ERROR_FIELDS = { - "last_input_framing_errors": "framing-errors", "last_bit_error_seconds": "bit-error-seconds", "last_errored_blocks_seconds": "errored-blocks-seconds", + "last_input_framing_errors": "framing-errors", "last_input_crc_errors": "input-crc-errors", - "last_input_total_errors": "input-total-errors", "last_input_discards": "input-discards", "last_input_drops": "input-drops", + "last_input_total_errors": "input-total-errors", + "last_output_discards": "output-discards", "last_output_drops": "output-drops", + "last_output_total_errors": "output-total-errors", } INFLUX_TIME_WINDOW_TODAY = "time > now() - 1d" @@ -79,14 +83,16 @@ PROCESSED_ERROR_COUNTERS_SCHEMA = { "error_counters_content": { "type": "object", "properties": { - "framing-errors": {"type": "integer"}, "bit-error-seconds": {"type": "integer"}, "errored-blocks-seconds": {"type": "integer"}, + "framing-errors": {"type": "integer"}, "input-crc-errors": {"type": "integer"}, - "input-total-errors": {"type": "integer"}, "input-discards": {"type": "integer"}, "input-drops": {"type": "integer"}, + "input-total-errors": {"type": "integer"}, + "output-discards": {"type": "integer"}, "output-drops": {"type": "integer"}, + "output-total-errors": {"type": "integer"}, }, "additionalProperties": False, }, @@ -96,14 +102,17 @@ PROCESSED_ERROR_COUNTERS_SCHEMA = { "router": {"type": "string"}, "interface": {"type": "string"}, "description": {"type": "string"}, - "error_counters": {"$ref": "#/definitions/error_counters_content"}, + "errors_today": {"$ref": "#/definitions/error_counters_content"}, + "errors_yesterday": {"$ref": "#/definitions/error_counters_content"}, "diff": {"$ref": "#/definitions/error_counters_content"}, }, "required": [ "router", "interface", "description", - "error_counters", + "errors_today", + "errors_yesterday", + "diff", ], "additionalProperties": False, }, @@ -113,13 +122,11 @@ PROCESSED_ERROR_COUNTERS_SCHEMA = { "router": {"type": "string"}, "interface": {"type": "string"}, "description": {"type": "string"}, - "error_counters": {"$ref": "#/definitions/error_counters_content"}, }, "required": [ "router", "interface", "description", - "error_counters", ], "additionalProperties": False, }, @@ -140,12 +147,31 @@ PROCESSED_ERROR_COUNTERS_SCHEMA = { } -def setup_logging(): +class MessageCountingLogHandler(logging.NullHandler): + def __init__(self, level=logging.NOTSET) -> None: + super().__init__(level) + self.count = 0 + + def handle(self, _) -> None: + self.count += 1 + + +def setup_logging(debug=False) -> MessageCountingLogHandler: + """ + :param debug: Boolean. Set log level to DEBUG if True, or INFO otherwise + :returns: a MessageCounter object that tracks error log messages + """ + + level = logging.DEBUG if debug else logging.INFO + counter = MessageCountingLogHandler(level=logging.ERROR) + stream_handler = logging.StreamHandler(sys.stderr) + stream_handler.setLevel(level) logging.basicConfig( - stream=sys.stderr, - level="INFO", format="%(asctime)s - %(levelname)s - %(message)s", + level=level, + handlers=[counter, stream_handler], ) + return counter def get_error_points(client: InfluxDBClient, time_window: str): @@ -191,7 +217,6 @@ def interface_errors( interface_info, errors, exclusions=(), - raise_on_errors=False, ): """ Retrieves error counters from influx @@ -211,65 +236,52 @@ def interface_errors( key: select_error_fields(val, mapping=errors) for key, val in error_points_today.items() } - yesterdays_data = { - key: select_error_fields(val, mapping=errors) - for key, val in error_points_yesterday.items() - } result = {"interfaces": [], "excluded_interfaces": []} new_errors = {} for (router, ifc), info in interface_info.items(): + error_info = { + "router": router, + "interface": ifc, + "description": info["description"], + } + + if is_excluded_interface(info, exclusions): + logger.info(f"Found excluded interface {router} - {ifc}") + result["excluded_interfaces"].append(error_info) + continue + try: today = todays_data[(router, ifc)] except KeyError: logger.error(f"{router} - {ifc} not found in influx data") - if raise_on_errors: - raise continue - if not any(err > 0 for err in today.values()): + today_nonzero = {err: val for err, val in today.items() if val > 0} + if not today_nonzero: # skip interfaces without any errors continue - yesterday = yesterdays_data.get((router, ifc), {}) + yesterday = select_error_fields( + error_points_yesterday.get((router, ifc), {}), mapping=errors + ) - counters = { - "router": router, - "interface": ifc, - "error_counters": today, - "description": info["description"], + diff = { + err: (val - yesterday[err]) + for err, val in today_nonzero.items() + if (val - yesterday[err]) > 0 } - - if is_excluded_interface(info, exclusions): - logger.info(f"Found excluded interface {router} - {ifc}") - result["excluded_interfaces"].append(counters) + if not diff: + # Skip interface if it does not have any increased error counters continue - nonzero_errors = {err: val for err, val in today.items() if val > 0} - counters["error_counters"] = nonzero_errors - new_errors[(router, ifc)] = sum(nonzero_errors.values()) + error_info["errors_today"] = today_nonzero + error_info["diff"] = diff + error_info["errors_yesterday"] = yesterday - if any(yesterday.values()): - # we have existing errors - - # This is strictly not the most correct way to determine differences. - # during the day the error count may have reset and diffs may actually - # be negative, but we ignore those because that is (mostly) how it was - # done in the orginal bash script - diff = { - err: (val - yesterday[err]) - for err, val in nonzero_errors.items() - if (val - yesterday[err]) > 0 - } - if not diff: - # Skip interface if it does not have any increased error counters - continue - - counters["diff"] = diff - new_errors[(router, ifc)] = sum(diff.values()) - - result["interfaces"].append(counters) + new_errors[(router, ifc)] = sum(diff.values()) + result["interfaces"].append(error_info) result["interfaces"].sort( key=lambda i: ( @@ -377,9 +389,13 @@ def main(config: dict, send_mail: bool): "or printing the report to stdout (--no-email). Default: --email", ) def cli(config, send_mail): - setup_logging() + error_counter = setup_logging() config = load(config_file=config) main(config, send_mail=send_mail) + if error_counter.count > 0: + raise click.ClickException( + "Errors were encountered while generating error report" + ) if __name__ == "__main__": diff --git a/brian_polling_manager/error_report/error_report.html.jinja2 b/brian_polling_manager/error_report/error_report.html.jinja2 index 8314caecf84aa17562164ca25cbffa47061220d0..c9d0d722e42e97412de86c4fac948be79aa4a52b 100644 --- a/brian_polling_manager/error_report/error_report.html.jinja2 +++ b/brian_polling_manager/error_report/error_report.html.jinja2 @@ -5,76 +5,85 @@ <meta name="viewport" content="width=device-width, initial-scale=1" /> <title>GÉANT Interface error report</title> <style> - html { - font-family: monospace; - font-size: 12pt; - } + html { + font-family: monospace; + font-size: 12pt; + } - section, footer { - padding: 20pt 0 0 20pt; - } + section, footer { + margin: 20pt 0 0 20pt; + } - .interface-block { - padding-bottom: 20pt; - } + .interface-block { + margin-bottom: 20pt; + } - .interface-errors { - padding-left: 20pt; - } + .interface-errors { + margin-left: 20pt; + } - ul { - list-style-type: none; - } + .interface-list { + list-style-type: none; + } - h3, p, ul { - margin: 0pt; - } + h3, p, ul { + margin: 0pt; + } - .error-table { - padding-left: 20pt; - } + .error-table { + margin-left: 10pt; + } - td { - padding-right: 10pt; - } + th { + text-align: right; + font-weight: normal; + } - td.diff { - padding-left: 20pt; - } + td { + width: 80pt; + text-align: right; + margin-left: 20pt; + } + .first-column { + min-width: 150pt; + } + + .yesterday { + color: #757575; + } + + .diff { + font-weight: bold; + } </style> </head> <body> - <section> - <div class="interface-report"> - {%- for ifc in interfaces %} - <div class="interface-block"> - <h3>{{ ifc.router }}</h3> - <div class="interface-errors"> - <p><strong>{{ ifc.interface }}</strong> {{ ifc.description }}</p> - <table class="error-table"> - {%- if ifc.diff %} + <section class="interface-report"> + {%- for ifc in interfaces %} + <div class="interface-block"> + <h3>{{ ifc.router }}<small> - {{ ifc.interface }}</small></h3> + <div class="interface-errors"> + <p>{{ ifc.description }}</p> + <table class="error-table"> + <tr> + <th class="first-column"></th> + <th class="yesterday">Yesterday</th> + <th>Today</th> + <th class="diff">Diff</th> + </tr> {%- for err, diff in ifc.diff.items() %} - <tr> - <td>{{ err }}</td> - <td>{{ ifc.error_counters[err] }}</td> - <td class="diff">Diff:</td> - <td>{{ diff }}</td> - </tr> - {%- endfor %} - {%- else %} - {%- for err, val in ifc.error_counters.items() %} - <tr> - <td>{{ err }}</td> - <td>{{ val }}</td> - </tr> + <tr> + <td class="first-column">{{ err }}</td> + <td class="yesterday">{{ ifc.errors_yesterday[err] }}</td> + <td>{{ ifc.errors_today[err] }}</td> + <td class="diff">{{ diff }}</td> + </tr> {%- endfor %} - {%- endif %} - </table> - </div> + </table> </div> - {%- endfor %} </div> + {%- endfor %} </section> {%- if excluded_interfaces %} <section class="excluded"> diff --git a/test/error_report/data/small-inventory.json b/test/error_report/data/small-inventory.json index ee2350c007595774c4be6a1ea2177afd787e7a98..d0fede63bb3130be3a4acce5f5e8472d3ca489b5 100644 --- a/test/error_report/data/small-inventory.json +++ b/test/error_report/data/small-inventory.json @@ -46,5 +46,14 @@ } ], "snmp-index": 9999 + }, + { + "router": "rt0.ams.nl.geant.net", + "name": "lag-1", + "bundle": [], + "bundle-parents": [], + "description": "PHY nokia", + "circuits": [], + "snmp-index": 9999 } ] diff --git a/test/error_report/data/test_render_html-expected.html b/test/error_report/data/test_render_html-expected.html deleted file mode 100644 index 3a0a88538edef7819b70efcdac582dfd1458fabd..0000000000000000000000000000000000000000 --- a/test/error_report/data/test_render_html-expected.html +++ /dev/null @@ -1,86 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8" /> - <meta name="viewport" content="width=device-width, initial-scale=1" /> - <title>GÉANT Interface error report</title> - <style> - html { - font-family: monospace; - font-size: 12pt; - } - - section, footer { - padding: 20pt 0 0 20pt; - } - - .interface-block { - padding-bottom: 20pt; - } - - .interface-errors { - padding-left: 20pt; - } - - ul { - list-style-type: none; - } - - h3, p, ul { - margin: 0pt; - } - - .error-table { - padding-left: 20pt; - } - - td { - padding-right: 10pt; - } - - td.diff { - padding-left: 20pt; - } - - </style> - </head> - <body> - <section> - <div class="interface-report"> - <div class="interface-block"> - <h3>mx1.ams.nl.geant.net</h3> - <div class="interface-errors"> - <p><strong>ae1</strong> PHY blah blah</p> - <table class="error-table"> - <tr> - <td>framing-errors</td> - <td>4</td> - <td class="diff">Diff:</td> - <td>2</td> - </tr> - <tr> - <td>input-drops</td> - <td>2</td> - <td class="diff">Diff:</td> - <td>1</td> - </tr> - </table> - </div> - </div> - <div class="interface-block"> - <h3>mx1.fra.de.geant.net</h3> - <div class="interface-errors"> - <p><strong>ae10</strong> PHY blah blah foo</p> - <table class="error-table"> - <tr> - <td>input-drops</td> - <td>3</td> - </tr> - </table> - </div> - </div> - </div> - </section> - <footer>Generated SOME_DATE</footer> - </body> -</html> \ No newline at end of file diff --git a/test/error_report/data/test_render_html_with_exclusions-expected.html b/test/error_report/data/test_render_html_with_exclusions-expected.html deleted file mode 100644 index 2369a6fc18b5575f8cf77c68631943946a5c2d72..0000000000000000000000000000000000000000 --- a/test/error_report/data/test_render_html_with_exclusions-expected.html +++ /dev/null @@ -1,72 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta charset="utf-8" /> - <meta name="viewport" content="width=device-width, initial-scale=1" /> - <title>GÉANT Interface error report</title> - <style> - html { - font-family: monospace; - font-size: 12pt; - } - - section, footer { - padding: 20pt 0 0 20pt; - } - - .interface-block { - padding-bottom: 20pt; - } - - .interface-errors { - padding-left: 20pt; - } - - ul { - list-style-type: none; - } - - h3, p, ul { - margin: 0pt; - } - - .error-table { - padding-left: 20pt; - } - - td { - padding-right: 10pt; - } - - td.diff { - padding-left: 20pt; - } - - </style> - </head> - <body> - <section> - <div class="interface-report"> - <div class="interface-block"> - <h3>mx1.ams.nl.geant.net</h3> - <div class="interface-errors"> - <p><strong>ae1</strong> PHY blah blah</p> - <table class="error-table"> - <tr> - <td>input-drops</td> - <td>2</td> - </tr> - </table> - </div> - </div> - </div> - </section> - <section class="excluded"> - <p>EXCLUDED INTERFACES</p> - <ul class="interface-list"> - <li>mx1.fra.de.geant.net - ae10 - PHY blah blah foo</li> - </ul> - </section> - <footer>Generated SOME_DATE</footer> - </body> -</html> \ No newline at end of file diff --git a/test/error_report/test_error_report.py b/test/error_report/test_error_report.py index 4865948f1bfb47730207e3c9356e998a5d683573..21532c68d9055aecb7f3599dfed1e37d317e8623 100644 --- a/test/error_report/test_error_report.py +++ b/test/error_report/test_error_report.py @@ -18,6 +18,7 @@ from brian_polling_manager.error_report.cli import ( INFLUX_TIME_WINDOW_TODAY, INFLUX_TIME_WINDOW_YESTERDAY, PROCESSED_ERROR_COUNTERS_SCHEMA, + MessageCountingLogHandler, _filter_and_sort_interfaces, get_error_points, get_relevant_interfaces, @@ -247,6 +248,9 @@ def test_select_error_fields_skips_other_fields(): } +DEFAULT_ERRORS_YESTERDAY = select_error_fields({}, ERROR_FIELDS) + + def test_interface_errors_with_new_errors(create_error_point, get_interface_errors): create_error_point( "mx1.ams.nl.geant.net", @@ -268,7 +272,7 @@ def test_interface_errors_with_new_errors(create_error_point, get_interface_erro "router": "mx1.ams.nl.geant.net", "interface": "ae1", "description": "PHY blah blah", - "error_counters": { + "errors_today": { "framing-errors": 1, "bit-error-seconds": 2, "errored-blocks-seconds": 3, @@ -278,6 +282,17 @@ def test_interface_errors_with_new_errors(create_error_point, get_interface_erro "input-drops": 7, "output-drops": 8, }, + "diff": { + "framing-errors": 1, + "bit-error-seconds": 2, + "errored-blocks-seconds": 3, + "input-crc-errors": 4, + "input-total-errors": 5, + "input-discards": 6, + "input-drops": 7, + "output-drops": 8, + }, + "errors_yesterday": DEFAULT_ERRORS_YESTERDAY, } ], "excluded_interfaces": [], @@ -295,17 +310,21 @@ def test_interface_errors_with_multiple_interfaces( "router": "mx1.ams.nl.geant.net", "interface": "ae1", "description": "PHY blah blah", - "error_counters": { + "errors_today": { "framing-errors": 2, }, + "diff": {"framing-errors": 2}, + "errors_yesterday": DEFAULT_ERRORS_YESTERDAY, }, { "router": "mx1.fra.de.geant.net", "interface": "ae10", "description": "PHY blah blah foo", - "error_counters": { + "errors_today": { "framing-errors": 1, }, + "diff": {"framing-errors": 1}, + "errors_yesterday": DEFAULT_ERRORS_YESTERDAY, }, ] @@ -318,6 +337,15 @@ def test_logs_message_on_missing_error_counters_for_interface( assert "mx1.fra.de.geant.net - ae10 not found in influx data" in caplog.text +def test_does_not_log_message_for_missing_excluded_interface( + create_error_point, get_interface_errors, caplog +): + create_error_point("mx1.ams.nl.geant.net", "ae1", "today", framing_errors=0) + create_error_point("mx1.fra.de.geant.net", "ae10", "today", framing_errors=0) + get_interface_errors(exclusions=[("rt0.ams.nl.geant.net", "lag-1")]) + assert not [r for r in caplog.records if r.levelname == "ERROR"] + + def test_does_not_check_for_logical_interfaces(get_interface_errors, caplog): get_interface_errors() assert "mx1.fra.de.geant.net - ae99.1" not in caplog.text @@ -336,31 +364,21 @@ def test_interface_errors_doesnt_include_0_errors( "mx1.ams.nl.geant.net", "ae1", "today", input_drops=1, framing_errors=0 ) errors = get_interface_errors() - assert errors["interfaces"][0] == { - "router": "mx1.ams.nl.geant.net", - "interface": "ae1", - "description": "PHY blah blah", - "error_counters": { - "input-drops": 1, - }, - } + assert errors["interfaces"][0]["errors_today"] == {"input-drops": 1} -def test_increased_error_produces_diff(create_error_point, get_interface_errors): +def test_interface_errors_with_yesterday_errors( + create_error_point, get_interface_errors +): create_error_point("mx1.ams.nl.geant.net", "ae1", "yesterday", input_drops=1) create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=10) + errors = get_interface_errors() - assert errors["interfaces"][0] == { - "router": "mx1.ams.nl.geant.net", - "interface": "ae1", - "description": "PHY blah blah", - "error_counters": { - "input-drops": 10, - }, - "diff": { - "input-drops": 9, - }, + assert errors["interfaces"][0]["diff"] == {"input-drops": 9} + assert errors["interfaces"][0]["errors_yesterday"] == { + **DEFAULT_ERRORS_YESTERDAY, + "input-drops": 1, } @@ -398,32 +416,18 @@ def test_processes_excluded_interface(create_error_point, get_interface_errors): ) create_error_point( "mx1.fra.de.geant.net", "ae10", "today", input_drops=3, framing_errors=4 - ) # this interface is excluded through its description + ) errors = get_interface_errors(exclusions=[("mx1.fra.de.geant.net", "ae10")]) - assert errors["interfaces"] == [ - { - "router": "mx1.ams.nl.geant.net", - "interface": "ae1", - "description": "PHY blah blah", - "error_counters": {"input-drops": 1, "framing-errors": 2}, - } - ] + + assert {(ifc["router"], ifc["interface"]) for ifc in errors["interfaces"]} == { + ("mx1.ams.nl.geant.net", "ae1") + } assert errors["excluded_interfaces"] == [ { "router": "mx1.fra.de.geant.net", "interface": "ae10", "description": "PHY blah blah foo", - "error_counters": { - "input-drops": 3, - "framing-errors": 4, - "bit-error-seconds": 0, - "errored-blocks-seconds": 0, - "input-crc-errors": 0, - "input-total-errors": 0, - "input-discards": 0, - "output-drops": 0, - }, } ] @@ -474,10 +478,38 @@ def test_render_html(create_error_point, get_interface_errors): create_error_point("mx1.fra.de.geant.net", "ae10", "today", input_drops=3) errors = get_interface_errors() result = render_html(errors=errors, date="SOME_DATE") - # The expected value contains mixed tabs and spaces. We put it in a separate file - # to comply with flake8 - expected = (DATA_DIR / "test_render_html-expected.html").read_text() - assert result == expected + + assert '<td class="first-column">input-drops</td>' in result + assert '<td class="first-column">framing-errors</td>' in result + + +def test_render_html_for_nokia(create_error_point, get_interface_errors): + create_error_point( + "rt0.ams.nl.geant.net", + "lag-1", + "yesterday", + input_total_errors=1, + input_discards=1, + output_total_errors=1, + output_discards=1, + ) + create_error_point( + "rt0.ams.nl.geant.net", + "lag-1", + "today", + input_total_errors=2, + input_discards=2, + output_total_errors=2, + output_discards=2, + ) + + errors = get_interface_errors() + result = render_html(errors=errors, date="SOME_DATE") + + assert '<td class="first-column">input-total-errors</td>' in result + assert '<td class="first-column">input-discards</td>' in result + assert '<td class="first-column">output-total-errors</td>' in result + assert '<td class="first-column">output-discards</td>' in result def test_render_html_with_exclusions(create_error_point, get_interface_errors): @@ -491,11 +523,9 @@ def test_render_html_with_exclusions(create_error_point, get_interface_errors): ) errors = get_interface_errors(exclusions=[("mx1.fra.de.geant.net", "ae10")]) result = render_html(errors=errors, date="SOME_DATE") - # The expected value contains mixed tabs and spaces. We put it in a separate file - # to comply with flake8 - expected = (DATA_DIR / "test_render_html_with_exclusions-expected.html").read_text() - assert result == expected + assert "<p>EXCLUDED INTERFACES</p>" in result + assert "mx1.fra.de.geant.net - ae10 - PHY blah blah foo" in result def test_render_email(): @@ -615,14 +645,17 @@ def config_file(tmp_path): "username": "some-username", "password": "user-password", }, - "exclude-interfaces": [("mx1.fra.de.geant.net", "ae10")], + "exclude-interfaces": [ + ("mx1.fra.de.geant.net", "ae10"), + ("rt0.ams.nl.geant.net", "lag-1"), + ], } path = tmp_path / "config.json" path.write_text(json.dumps(config)) return path -@patch.object(cli, "setup_logging") +@patch.object(cli, "setup_logging", return_value=MessageCountingLogHandler()) @patch.object(smtplib, "SMTP") def test_e2e( SMTP, @@ -633,7 +666,6 @@ def test_e2e( create_error_point, ): create_error_point("mx1.ams.nl.geant.net", "ae1", "today", input_drops=1) - with patch.object( cli, "load_interfaces", return_value=small_inventory ), patch.object(cli, "influx_client", return_value=mock_influx_client): @@ -653,4 +685,17 @@ def test_e2e( assert "mx1.ams.nl.geant.net" in report assert "ae1" in report - assert "<td>input-drops</td>" in report + assert '<td class="first-column">input-drops</td>' in report + + +@patch.object(cli, "setup_logging", return_value=MessageCountingLogHandler()) +@patch.object(cli, "main") +def test_e2e_with_errors( + unused_mainmain, + setup_logging, + config_file, +): + setup_logging().count += 1 + runner = CliRunner() + result = runner.invoke(cli.cli, ["--config", str(config_file)]) + assert result.exit_code == 1