Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • develop
  • feature/POL1-813-error-report-sensu-check
  • master
  • 0.1
  • 0.10
  • 0.11
  • 0.12
  • 0.13
  • 0.14
  • 0.15
  • 0.16
  • 0.17
  • 0.18
  • 0.19
  • 0.2
  • 0.20
  • 0.3
  • 0.4
  • 0.5
  • 0.6
  • 0.7
  • 0.8
  • 0.9
23 results

Target

Select target project
  • geant-swd/brian/brian-polling-manager
1 result
Select Git revision
  • develop
  • feature/POL1-813-error-report-sensu-check
  • master
  • 0.1
  • 0.10
  • 0.11
  • 0.12
  • 0.13
  • 0.14
  • 0.15
  • 0.16
  • 0.17
  • 0.18
  • 0.19
  • 0.2
  • 0.20
  • 0.3
  • 0.4
  • 0.5
  • 0.6
  • 0.7
  • 0.8
  • 0.9
23 results
Show changes
Commits on Source (16)
Showing
with 6732 additions and 200 deletions
......@@ -7,4 +7,5 @@ coverage.xml
htmlcov
docs/build
*.log
dist/
\ No newline at end of file
dist/
bom.json
\ No newline at end of file
......@@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
## [0.17] - 2025-05-20
- Add 120s timeout to get-interface-stats in case it gets stuck
## [0.16] - 2025-02-14
- Pin ncclient version due to bad release
- POL1-884: Add GWS Indirect polling for Nokia
......
MIT License
Copyright (c) 2023 GÉANT Vereniging
Copyright (c) 2025 GÉANT Vereniging
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
......@@ -210,7 +210,12 @@ class State(object):
@last.setter
def last(self, new_last: Union[float, None]):
if not new_last or new_last < 0:
os.unlink(self.cache_filenames['state'])
try:
os.unlink(self.cache_filenames['state'])
except OSError:
logger.exception(
f'unable to delete state file {self.cache_filenames["state"]}')
return
else:
State._save_json(
self.cache_filenames['state'],
......
import enum
import json
import logging.config
import logging
import sys
import threading
import traceback
from datetime import datetime
from logging import LogRecord
from typing import Any, Collection, Dict, Iterable, List
......@@ -15,9 +17,9 @@ from brian_polling_manager.inventory import (
load_inventory_json,
)
from .common import PointGroup, RouterProcessor
from .juniper import JuniperRouterProcessor
from .nokia import NokiaRouterProcessor
from brian_polling_manager.interface_stats.common import PointGroup, RouterProcessor
from brian_polling_manager.interface_stats.juniper import JuniperRouterProcessor
from brian_polling_manager.interface_stats.nokia import NokiaRouterProcessor
logger = logging.getLogger()
......@@ -177,7 +179,7 @@ def _log_interface_points_sorted(points: Collection[dict], point_group: PointGro
longest_ifc = max(len(i) for i in interfaces)
ifc_count = len(interfaces)
for n in range(ifc_count // N_COLUMNS + (ifc_count % N_COLUMNS > 0)):
ifc_slice = interfaces[n * N_COLUMNS : (n + 1) * N_COLUMNS]
ifc_slice = interfaces[n * N_COLUMNS: (n + 1) * N_COLUMNS]
logger.info(" ".join(i.ljust(longest_ifc) for i in ifc_slice))
......@@ -224,7 +226,7 @@ def main(
def validate_config(_unused_ctx, _unused_param, file):
# import here because this is the only place we use the config module, and we want
# to reuse the name `config` for other purposes elsewheres
from . import config
from brian_polling_manager.interface_stats import config
try:
return config.load(file)
......@@ -295,11 +297,24 @@ def cli(
error_counter = setup_logging(debug=verbose)
try:
main(
processor=processor,
interfaces=interfaces if interfaces else ALL_,
output=OutputMethod.from_string(output.lower()),
thread = threading.Thread(
target=main,
args=(processor, interfaces if interfaces else ALL_, OutputMethod.from_string(output.lower())),
)
thread.daemon = True
thread.start()
thread.join(timeout=120)
if thread.is_alive():
logger.error("Thread timed out")
frames = sys._current_frames()
for thread_id, frame in frames.items():
if thread_id == thread.ident:
logger.error("Thread stack trace:")
for line in traceback.format_stack(frame):
logger.error(line.strip())
logger.error("-- Thread stack trace end --")
raise click.exceptions.Exit(2)
except Exception:
logger.exception(
f"Error while processing {processor.name.capitalize()} router {router_fqdn}"
......
......@@ -172,7 +172,10 @@ def interface_counters(
)
for ifc in remaining:
logger.error(f"Interface {ifc} was not found on router")
logger.error(
f"Interface {ifc} does not have {str(point_group).upper()}"
" counters on router"
)
def get_netconf_interface_info(router_name, ssh_params):
......@@ -187,7 +190,7 @@ def get_netconf_interface_info(router_name, ssh_params):
def get_netconf_interface_info_from_source_dir(router_name: str, source_dir: str):
file = pathlib.Path(source_dir) / f"{router_name}-interface-info.xml"
file = pathlib.Path(source_dir) / f"{router_name}-juniper.xml"
if not file.is_file():
raise ValueError(f"file {file} is not a valid file")
return etree.fromstring(file.read_text())
......@@ -291,7 +291,7 @@ def get_netconf_interface_info_from_source_dir(router_name: str, source_dir: str
return etree.fromstring(file.read_text())
return read_doc_or_raise(f"{router_name}-state.xml")
return read_doc_or_raise(f"{router_name}-nokia.xml")
# This could be a dataclass at some point
......
sphinx-build -b html -t drawio docs/source docs/build
*.bkp
This diff is collapsed.
System Architecture Notes
=========================
Components Overview
----------------------
.. only:: drawio
.. drawio-image:: architecture.drawio
:page-name: components
Systems Overview
----------------------
.. only:: drawio
.. drawio-image:: architecture.drawio
:page-name: deployment
Monitoring Schematic
----------------------
.. only:: drawio
.. drawio-image:: architecture.drawio
:page-name: monitoring
......@@ -14,7 +14,9 @@
# import sys
# sys.path.insert(0, os.path.abspath('.'))
from datetime import datetime
from importlib import import_module
import importlib.metadata
from docutils.parsers.rst import Directive
from docutils import nodes
from sphinx import addnodes
......@@ -55,11 +57,10 @@ def setup(app):
# -- Project information -----------------------------------------------------
project = 'BRIAN Polling Manager'
copyright = '2021, swd@geant.org'
author = 'swd@geant.org'
copyright = f"{datetime.now().year}, GÉANT"
author = "swd@geant.org"
# The full version, including alpha/beta/rc tags
release = '0.0.1'
release = importlib.metadata.version('brian_polling_manager')
# -- General configuration ---------------------------------------------------
......@@ -73,6 +74,11 @@ extensions = [
'sphinx.ext.coverage'
]
# the tags variable is injected by sphinx into conf.py
# (toggle this by running ``sphinx-build -t drawio``)
if tags.tags.get("drawio", False): # noqa F821
extensions.append("sphinxcontrib.drawio")
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
......
......@@ -16,5 +16,6 @@ Sensu checks for polling the data required by BRIAN.
:maxdepth: 2
:caption: Contents:
architecture
main
api
......@@ -12,3 +12,4 @@ responses
PyYAML
sphinx
sphinx-rtd-theme
sphinxcontrib-drawio
......@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='brian-polling-manager',
version="0.16",
version="0.17",
author='GEANT',
author_email='swd@geant.org',
description='service for managing BRIAN polling checks',
......
import functools
import json
import pathlib
from unittest.mock import patch
from brian_polling_manager.interface_stats import juniper
from brian_polling_manager.interface_stats import cli, juniper
from brian_polling_manager.interface_stats import nokia
import pytest
DATA_DIR = pathlib.Path(__file__).parent / "data"
JUNIPER_DATA_FILENAME_EXTENSION = "-interface-info.xml"
NOKIA_DATA_FILENAME_EXTENSION = "-state.xml"
JUNIPER_DATA_FILENAME_EXTENSION = "-juniper.xml"
NOKIA_DATA_FILENAME_EXTENSION = "-nokia.xml"
JUNIPER_ROUTERS = [
path.name[: -len(JUNIPER_DATA_FILENAME_EXTENSION)]
......@@ -75,81 +76,67 @@ def mocked_get_netconf(data_dir):
yield
# @functools.lru_cache()
def _filter_interfaces(interfaces, router_key: str):
return [
ifc
for ifc in interfaces
if ifc[router_key] in set(JUNIPER_ROUTERS) | set(NOKIA_ROUTERS)
]
@functools.lru_cache()
def poller_interfaces():
file = DATA_DIR / "poller-interfaces.json"
return json.loads(file.read_text())
# def error_interfaces():
# def filter_interface(interface: dict):
# description = interface["description"].lower()
# name = interface["name"].lower()
# return (
# "phy" in description
# and "spare" not in description
# and "non-operational" not in description
# and "reserved" not in description
# and "test" not in description
# and "dsc." not in name
# and "fxp" not in name
# and not re.match(r".*\.\d+$", name)
# )
# return list(filter(filter_interface, poller_interfaces()))
result = json.loads(file.read_text())
return _filter_interfaces(result, "router")
@functools.lru_cache()
def error_interfaces():
file = DATA_DIR / "error-interfaces.json"
result = json.loads(file.read_text())
return _filter_interfaces(result, "router")
@functools.lru_cache()
def gws_indirect():
file = DATA_DIR / "gws-indirect.json"
result = json.loads(file.read_text())
return _filter_interfaces(result, "hostname")
@pytest.fixture(scope="session")
def juniper_inventory():
def _excluded(ifc):
return ifc["name"].startswith("dsc")
polled = {}
result = {router: set() for router in JUNIPER_ROUTERS}
for ifc in poller_interfaces():
if _excluded(ifc):
continue
polled.setdefault(ifc["router"], set()).add(ifc["name"])
return polled
if ifc["router"] in result:
result[ifc["router"]].add(ifc["name"])
return result
@pytest.fixture(scope="session")
def nokia_inventory():
file = DATA_DIR / "nokia-interfaces.json"
data = json.loads(file.read_text())
return {router: set(interfaces) for router, interfaces in data.items()}
# @pytest.fixture
# def mock_inventory(juniper_inventory, nokia_inventory):
# def _all_interfaces():
# for router, interfaces in juniper_inventory.items():
# yield from ({"router": router, "name": ifc, "vendor": "juniper"} for ifc in interfaces)
# for router, interfaces in nokia_inventory.items():
# yield from ({"router": router, "name": ifc,"vendor": "nokia"} for ifc in interfaces)
# all_interfaces= [
# _all_interfaces()
# ]
# error_interface_names = {}
# def load_inventory_json(url, *args, **kwargs):
# if "gws/indirect" in url:
# return [
# [
# {
# "hostname": "rt0.ams.nl.lab.office.geant.net",
# "interface": "lag-11.333",
# "vendor": "nokia",
# "ip_filter": "NREN_IAS_DFN_OUT",
# },
# ]
# ]
# if "error" in url:
# error_interfaces
# return all_interfaces
# with patch.object(
# cli, "load_inventory_json", side_effect=load_inventory_json
# ) as mock:
# yield mock
result = {router: set() for router in NOKIA_ROUTERS}
for ifc in poller_interfaces():
if ifc["router"] in result:
result[ifc["router"]].add(ifc["name"])
return result
@pytest.fixture
def mock_inventory():
def load_inventory_json(url, *args, **kwargs):
if "gws/indirect" in url:
return gws_indirect()
if "error" in url:
return error_interfaces()
return poller_interfaces()
with patch.object(
cli, "load_inventory_json", side_effect=load_inventory_json
) as mock:
yield mock
@pytest.fixture
......
import logging
import os
import pathlib
from brian_polling_manager.interface_stats import nokia
from lxml import etree
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
ncclient_logger = logging.getLogger("ncclient")
ncclient_logger.level = logging.WARNING
ROUTERS = [
"rt0.ams.nl.geant.net",
"rt0.lon2.uk.geant.net",
"rt0.ath.gr.lab.office.geant.net",
"rt0.ams.nl.lab.office.geant.net",
]
SSH_PARAMS = {
"ssh_config": "~/.ssh/config",
"hostkey_verify": False,
"username": "inprov",
"password": os.environ["NOKIA_PASSWORD"],
}
def load_estate_interface_info():
for fqdn in ROUTERS:
logger.info(fqdn)
doc = nokia.get_netconf_interface_info(router_name=fqdn, ssh_params=SSH_PARAMS)
file = pathlib.Path(__file__).parent / f"{fqdn}-state.xml"
file.write_bytes(etree.tostring(doc))
if __name__ == "__main__":
load_estate_interface_info()
import concurrent.futures
import json
import os
import pathlib
import traceback
from brian_polling_manager.interface_stats.juniper import (
get_netconf_interface_info,
)
from brian_polling_manager.interface_stats import juniper, nokia
from lxml import etree
import requests
ROUTER_INPROV_URL = "https://uat-inprov01.geant.org/classifier/router-info"
BRIAN_INPROV_URL = "https://test-inprov01.geant.org/poller/interfaces"
ERROR_INPROV_URL = "https://test-inprov01.geant.org/poller/error-report-interfaces"
GWS_INDIRECT_INPROV_URL = (
"https://test-inprov01.geant.org/poller/gws/indirect?vendor=1&ip_filter=1"
)
ROUTERS = [
# snapshot of https://prod-ne-sot01.geant.net/ne-sot/all_dashboard_devices.txt
"mx1.ams.nl.geant.net",
"mx1.ath2.gr.geant.net",
"mx1.buc.ro.geant.net",
"mx1.bud.hu.geant.net",
"mx1.dub.ie.geant.net",
"mx1.gen.ch.geant.net",
"mx1.lon.uk.geant.net",
"mx1.lon2.uk.geant.net",
"mx1.mad.es.geant.net",
"mx1.par.fr.geant.net",
"mx1.poz.pl.geant.net",
"mx1.vie.at.geant.net",
"mx2.ath.gr.geant.net",
"mx2.lis.pt.geant.net",
"mx2.zag.hr.geant.net",
"rt1.bil.es.geant.net",
"rt1.por.pt.geant.net",
"rt2.ams.nl.geant.net",
"rt1.kau.lt.geant.net",
"rt2.kau.lt.geant.net",
"rt1.rig.lv.geant.net",
"rt2.rig.lv.geant.net",
"rt1.tar.ee.geant.net",
"rt2.tar.ee.geant.net",
"qfx.fra.de.geant.net",
"qfx.par.fr.geant.net",
"qfx.lon2.uk.geant.net",
# srx's are apparently only reachable by v4,
# ... and there's something else wrong with my jump config - ???
# (current test data was captured by running on a host where
# jump was not necessary)
# 'srx1.am.office.geant.net',
# 'srx2.am.office.geant.net',
# 'srx1.ch.office.geant.net',
# 'srx2.ch.office.geant.net',
"rt1.kie.ua.geant.net",
"rt2.kie.ua.geant.net",
"rt1.chi.md.geant.net",
"rt2.chi.md.geant.net",
"rt1.bra.sk.geant.net",
"rt1.mil2.it.geant.net",
"rt1.mar.fr.geant.net",
"rt1.pra.cz.geant.net",
"rt1.fra.de.geant.net",
"rt1.bru.be.geant.net",
"rt2.bru.be.geant.net",
"rt1.ham.de.geant.net",
"rt1.sof.bg.geant.net",
"rt2.bra.sk.geant.net",
"rt1.lju.si.geant.net",
"rt1.buc.ro.geant.net",
"rt1.cor.ie.geant.net",
"rt2.cor.ie.geant.net",
"rt1.ams.nl.geant.net",
"rt1.the.gr.geant.net",
"rt2.the.gr.geant.net",
LAB_ROUTERS = [
("nokia", "rt0.ams.nl.lab.office.geant.net"),
("nokia", "rt0.ath.gr.lab.office.geant.net"),
]
SSH_PARAMS = {
"juniper": {"hostkey_verify": False, "ssh_config": "~/.ssh/config"},
"nokia": {
"ssh_config": "~/.ssh/config",
"hostkey_verify": False,
"username": "inprov",
"password": os.environ["NOKIA_PASSWORD"],
},
}
THIS_DIR = pathlib.Path(__file__).parent
def download_json(url):
response = requests.get(url)
response.raise_for_status()
return response.json()
def get_routers():
routers = download_json(ROUTER_INPROV_URL)
for router in filter(valid_router, routers):
yield (router["vendor"], router["hostname"])
yield from LAB_ROUTERS
def save_router_info(fqdn, ssh_params: dict):
print(fqdn)
file = pathlib.Path(__file__).parent / f"{fqdn}-interface-info.xml"
doc = get_netconf_interface_info(router_name=fqdn, ssh_params=ssh_params)
file.write_text(etree.tostring(doc))
def valid_router(router):
return not router["hostname"].startswith("srx")
def load_estate_interface_info():
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
for r in ROUTERS:
def save_router_info(hostname, vendor):
print(f"Started downloading netconf for {hostname}")
file = THIS_DIR / f"{hostname}-{vendor}.xml"
ssh_params = SSH_PARAMS[vendor]
if vendor == "juniper":
doc = juniper.get_netconf_interface_info(
router_name=hostname, ssh_params=ssh_params
)
elif vendor == "nokia":
doc = nokia.get_netconf_interface_info(
router_name=hostname, ssh_params=ssh_params
)
file.write_bytes(etree.tostring(doc))
return hostname
def load_netconf_state():
routers = get_routers()
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = {
executor.submit(
save_router_info,
r,
ssh_params={"hostkey_verify": False, "ssh_config": "~/.ssh/config"},
)
executor.shutdown(wait=True)
hostname=hostname,
vendor=vendor,
): hostname
for (vendor, hostname) in routers
}
try:
for future in concurrent.futures.as_completed(futures):
fqdn = futures[future]
try:
future.result()
except Exception:
traceback.print_exc()
print(f"Failed downloading netconf for {fqdn}")
continue
print(f"Success downloading netconf for {fqdn}")
except KeyboardInterrupt:
executor.shutdown(wait=False)
def download_and_save_json(url: str, path: pathlib.Path):
path.write_text(json.dumps(download_json(url), indent=2))
def main():
download_and_save_json(BRIAN_INPROV_URL, THIS_DIR / "poller-interfaces.json")
download_and_save_json(ERROR_INPROV_URL, THIS_DIR / "error-interfaces.json")
download_and_save_json(GWS_INDIRECT_INPROV_URL, THIS_DIR / "gws-indirect.json")
load_netconf_state()
if __name__ == "__main__":
load_estate_interface_info()
main()
This diff is collapsed.
[
{
"id": 730043,
"name": "DFN-AP1-IAS",
"customer": "UNKNOWN",
"speed": 10737418240,
"pop": "AMSTERDAM",
"hostname": "rt0.ams.nl.lab.office.geant.net",
"interface": "lag-11.333",
"type": "GWS - INDIRECT",
"status": "non-monitored",
"vendor": "nokia",
"ip_filter": "NREN_IAS_DFN_OUT"
}
]
\ No newline at end of file