-
Bjarke Madsen authoredBjarke Madsen authored
cli.py 10.38 KiB
import enum
import json
import logging
import sys
import threading
import traceback
from multiprocessing import Queue
from datetime import datetime
from logging import LogRecord
from typing import Any, Collection, Dict, Iterable, List, Optional
import click
import jsonschema
from brian_polling_manager.influx import influx_client
from brian_polling_manager.inventory import (
GWS_INDIRECT_SCHEMA,
INVENTORY_INTERFACES_SCHEMA,
load_inventory_json,
)
from brian_polling_manager.interface_stats.common import PointGroup, RouterProcessor
from brian_polling_manager.interface_stats.juniper import JuniperRouterProcessor
from brian_polling_manager.interface_stats.nokia import NokiaRouterProcessor
logger = logging.getLogger()
DEFAULT_INTERFACES_URL = "/poller/interfaces/"
def write_points_to_influx(
points: Iterable[dict],
influx_params: dict,
timeout=5,
batch_size=50,
):
client = influx_client({"timeout": timeout, **influx_params})
with client:
client.write_points(points, batch_size=batch_size)
def write_points_to_stdout(points, influx_params, stream=sys.stdout, **_):
for point in points:
stream.write(f"{influx_params['measurement']} - {json.dumps(point)}\n")
stream.flush()
class OutputMethod(enum.Enum):
INFLUX = ("influx", write_points_to_influx)
STDOUT = ("stdout", write_points_to_stdout)
NO_OUT = ("no-out", lambda *_, **__: None)
def write_points(self, points: Iterable[dict], influx_params: dict, **kwargs):
return self.value[1](points, influx_params=influx_params, **kwargs)
@classmethod
def from_string(cls, method: str):
return {m.value[0]: m for m in cls}[method]
def __str__(self):
return self.value[0]
class MessageCounter(logging.NullHandler):
def __init__(self, level=logging.NOTSET) -> None:
super().__init__(level)
self.count = 0
def handle(self, record: LogRecord) -> None:
self.count += 1
def setup_logging(debug=False) -> MessageCounter:
"""
:param debug: set log level to DEBUG, or INFO otherwise
:returns: a MessageCounter object that tracks error log messages
"""
# demote ncclient logs
def changeLevel(record):
if record.levelno == logging.INFO:
record.levelno = logging.DEBUG
record.levelname = "DEBUG"
return record
def drop(record):
pass
logging.getLogger("ncclient.operations.rpc").addFilter(changeLevel)
logging.getLogger("ncclient.transport.tls").addFilter(changeLevel)
logging.getLogger("ncclient.transport.ssh").addFilter(drop)
logging.getLogger("ncclient.transport.parser").addFilter(drop)
level = logging.DEBUG if debug else logging.INFO
counter = MessageCounter(level=logging.ERROR)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(level)
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(message)s",
level=level,
handlers=[counter, stream_handler],
)
return counter
def load_interfaces(
router_fqdn: str,
interfaces: Any,
point_group: PointGroup,
config: dict,
) -> Dict[str, dict]:
# if we choose to write points for all interfaces and we have provided inventory
# provider hosts, we make a selection based on the interfaces. Otherwise we write
# points for all interfaces we find on the router
if interfaces is not ALL_:
return {ifc: {} for ifc in interfaces}
inprov_hosts = config["inventory"]
params = point_group.get_config(config)
return _get_interfaces_for_router(
router_fqdn,
inprov_hosts=inprov_hosts,
url=params.get("inventory-url", DEFAULT_INTERFACES_URL),
point_group=point_group,
)
def _get_interfaces_for_router(
router: str, inprov_hosts: List[str], url: str, point_group: PointGroup
) -> Dict[str, dict]:
logger.info(
f"Fetching interfaces from inventory provider: {inprov_hosts} using url '{url}'"
)
if point_group == PointGroup.GWS_INDIRECT:
all_interfaces = {
ifc["interface"]: ifc
for ifc in load_inventory_json(url, inprov_hosts, GWS_INDIRECT_SCHEMA)
if ifc["hostname"] == router
}
else:
all_interfaces = {
ifc["name"]: ifc
for ifc in load_inventory_json(
url, inprov_hosts, INVENTORY_INTERFACES_SCHEMA
)
if ifc["router"] == router
}
return all_interfaces
def process_router(
processor: RouterProcessor,
point_group: PointGroup,
interfaces: Dict[str, dict],
timestamp: datetime,
output: OutputMethod,
):
influx_params = processor.group_config(point_group)["influx"]
points = list(
processor.points(
point_group=point_group, timestamp=timestamp, interfaces=interfaces
)
)
_log_interface_points_sorted(points, point_group=point_group)
output.write_points(points, influx_params=influx_params)
def _log_interface_points_sorted(points: Collection[dict], point_group: PointGroup):
N_COLUMNS = 5
num_points = len(points)
semicolon = ":" if num_points else ""
logger.info(f"Found {point_group} points for {num_points} interfaces{semicolon}")
if not points:
return
interfaces = sorted(p["tags"]["interface_name"] for p in points)
longest_ifc = max(len(i) for i in interfaces)
ifc_count = len(interfaces)
for n in range(ifc_count // N_COLUMNS + (ifc_count % N_COLUMNS > 0)):
ifc_slice = interfaces[n * N_COLUMNS: (n + 1) * N_COLUMNS]
logger.info(" ".join(i.ljust(longest_ifc) for i in ifc_slice))
ALL_ = object()
def main(
exception_queue: Optional[Queue],
processor: RouterProcessor,
interfaces=ALL_,
output: OutputMethod = OutputMethod.INFLUX,
):
try:
logger.info(
f"Processing {processor.name.capitalize()} router {processor.router_fqdn}"
)
timestamp = datetime.now()
for point_group in processor.supported_point_groups:
logger.info(f"Processing {str(point_group).upper()} points...")
inventory = processor.config.get("inventory")
check_interfaces = None
if inventory is not None:
check_interfaces = load_interfaces(
router_fqdn=processor.router_fqdn,
interfaces=interfaces,
point_group=point_group,
config=processor.config,
)
if not check_interfaces:
logger.info(f"No {str(point_group).upper()} interfaces found")
continue
process_router(
processor=processor,
point_group=point_group,
interfaces=check_interfaces,
timestamp=timestamp,
output=output,
)
except Exception:
if not exception_queue:
raise # no queue means we don't run in a thread, so we can just raise the exception
exc_info = sys.exc_info()
formatted = ''.join(traceback.format_exception(*exc_info))
exception_queue.put(formatted)
def validate_config(_unused_ctx, _unused_param, file):
# import here because this is the only place we use the config module, and we want
# to reuse the name `config` for other purposes elsewheres
from brian_polling_manager.interface_stats import config
try:
return config.load(file)
except json.JSONDecodeError:
raise click.BadParameter("config file is not valid json")
except jsonschema.ValidationError as e:
raise click.BadParameter(e)
@click.command()
@click.option(
"--config",
"config",
required=True,
type=click.File("r"),
help="config filename",
callback=validate_config,
)
@click.option("--juniper", help="A Juniper router fqdn")
@click.option("--nokia", help="A Nokia router fqdn")
@click.option(
"-o",
"--output",
type=click.Choice(["influx", "stdout", "no-out"], case_sensitive=False),
default="influx",
help="Choose an output method. Default: influx",
)
@click.option(
"--all",
"all_",
is_flag=True,
default=False,
help=(
"Write points for all interfaces found in inventory provider for this router."
" Do not use this flag when supplying a list of interfaces"
),
)
@click.option(
"-v", "--verbose", is_flag=True, default=False, help="Run with verbose output"
)
@click.argument("interfaces", nargs=-1)
def cli(
config: dict,
juniper: bool,
nokia: bool,
output: str,
all_: bool,
verbose: bool,
interfaces: List[str],
):
if not (interfaces or all_):
# Do nothing if no interfaces are specified
return
if interfaces and all_:
raise click.BadParameter("Do not supply both 'interfaces' and '--all'")
if not (juniper or nokia) or (juniper and nokia):
raise click.BadParameter(
"Supply either a '--juniper' or '--nokia' router, but not both"
)
router_fqdn = juniper or nokia
if juniper:
processor = JuniperRouterProcessor(router_fqdn, config)
else:
processor = NokiaRouterProcessor(router_fqdn, config)
error_counter = setup_logging(debug=verbose)
exception_queue = Queue()
thread = threading.Thread(
target=main,
args=(exception_queue, processor, interfaces if interfaces else ALL_, OutputMethod.from_string(output.lower())),
)
thread.daemon = True
thread.start()
thread.join(timeout=120)
if thread.is_alive() or not exception_queue.empty():
if thread.is_alive():
logger.error("Thread timed out")
frames = sys._current_frames()
for thread_id, frame in frames.items():
if thread_id == thread.ident:
logger.error("Thread stack trace:")
for line in traceback.format_stack(frame):
logger.error(line.strip())
logger.error("-- Thread stack trace end --")
else:
logger.error(f"Error while processing {processor.name.capitalize()} router {router_fqdn}:")
exception = exception_queue.get()
logger.error(exception)
raise click.exceptions.Exit(2)
if error_counter.count:
# Exit code 1 indicates WARNING in Sensu
raise click.ClickException(
"Errors were encountered while processing interface stats"
)
if __name__ == "__main__":
cli()