Skip to content
Snippets Groups Projects
Verified Commit b9543578 authored by Karel van Klink's avatar Karel van Klink :smiley_cat:
Browse files

Reformat files

parent 6a43fc36
No related branches found
No related tags found
No related merge requests found
"""GÉANT Capacity Planner."""
"""Generate a capacity report."""
import argparse
import os
import re
import sys
from datetime import UTC, datetime, timedelta
from logging import getLogger
from pathlib import Path
import numpy as np
import pandas as pd # https://pandas.pydata.org
import pandas as pd
logger = getLogger(__name__)
###############################################################################
# INPUT DATA SECTION
###############################################################################
# make sure this matches with the What-If Scenario runner script
RAW_REPORT_DIRECTORY = "/Users/daniel.verlouw/Desktop/rawreports"
......@@ -32,14 +33,11 @@ ISO8601_FORMAT = "%Y%m%dT%H%MZ"
ISO8601_REGEXP = r"\d{4}\d{2}\d{2}T\d{2}\d{2}Z"
###############################################################################
# RAW CONSOLIDATED REPORT
###############################################################################
# --- Helper function to get the row of the max usage for a given column, handling empty/missing columns ---
def get_max_usage_row(group, usage_col):
"""Returns a single row (as a Series) within `group` that has the maximum value in `usage_col`.
"""Given a list of rows, return the row with the highest usage.
Returns a single row (as a Series) within `group` that has the maximum value in `usage_col`.
If `usage_col` does not exist or is entirely NaN, returns None.
"""
# If the column doesn't exist or has all null values, return None
......@@ -53,9 +51,10 @@ def get_max_usage_row(group, usage_col):
def extract_usage_details(group):
"""For a single group of rows (all links with the same ID), find the row with the max usage for each usage field (Gbps)
and extract the relevant columns.
Booleans are set to True if at least one row in the group is True.
"""Extract usage details.
For a single group of rows (all links with the same ID), find the row with the max usage for each usage field (Gbps)
and extract the relevant columns. Booleans are set to True if at least one row in the group is True.
"""
# We'll create a dict to hold the final data for this ID.
out = {}
......@@ -140,12 +139,8 @@ def extract_usage_details(group):
return pd.Series(out)
###############################################################################
# HUMAN READABLE CONSOLIDATED REPORT
###############################################################################
def build_human_report(df_raw):
"""Build a human-readable report."""
df_human = df_raw.copy()
# Helper formatting functions
......@@ -245,35 +240,32 @@ def build_human_report(df_raw):
]
###############################################################################
# FILE FUNCTIONS
###############################################################################
def find_files_by_timeframe(directory, prefix, suffix, start_datetime, end_datetime):
"""Find all files that fall within a given timeframe."""
# List all raw reports in directory
all_raw_reports = [
file
for file in os.listdir(directory)
if os.path.isfile(os.path.join(directory, file))
and file.startswith(prefix)
and file.endswith(suffix)
and re.search(ISO8601_REGEXP, file)
for file in Path(directory).iterdir()
if Path(directory / file).is_file()
and file.name.startswith(prefix)
and file.name.endswith(suffix)
and re.search(ISO8601_REGEXP, file.name)
]
# Filter to files that match the timestamp pattern within the specified datetime range
matching_files = []
for file in all_raw_reports:
match = re.search(ISO8601_REGEXP, file)
match = re.search(ISO8601_REGEXP, file.name)
file_date = datetime.strptime(match.group(), ISO8601_FORMAT).replace(tzinfo=UTC)
if start_datetime <= file_date <= end_datetime:
matching_files.append(os.path.join(directory, file))
matching_files.append(Path(directory / file))
return matching_files
def store_consolidated(df_consolidated, directory, prefix, suffix):
"""Store consolidated results in a file."""
path = Path(directory)
path.mkdir(parents=True, exist_ok=True) # Create directory if it doesn't exist
......@@ -283,22 +275,17 @@ def store_consolidated(df_consolidated, directory, prefix, suffix):
if suffix == "csv":
df_consolidated.to_csv(
os.path.join(path, filename), sep=",", encoding="utf-8", date_format=ISO8601_FORMAT, header=True
Path(path / filename), sep=",", encoding="utf-8", date_format=ISO8601_FORMAT, header=True
)
elif suffix == "txt":
markdown = df_consolidated.to_markdown(headers="keys", tablefmt="psql")
# Write the markdown string to a file
with open(os.path.join(path, filename), "w") as file:
file.write(markdown)
###############################################################################
# MAIN
###############################################################################
Path(path / filename).write_text(markdown)
def main():
"""Main method for running the capacity planner."""
# Parse commandline arguments
parser = argparse.ArgumentParser(description="Script usage:")
parser.add_argument("--daily", action="store_true", help="Create daily report (past day)")
......@@ -343,9 +330,11 @@ def main():
)
if len(matching_files) > 0:
print(
f"Generating consolidated report for {len(matching_files)} raw reports for timeframe {start_datetime} through {end_datetime}"
msg = (
f"Generating consolidated report for {len(matching_files)} raw reports for timeframe {start_datetime} "
f"through {end_datetime}"
)
logger.info(msg)
# List of columns that should be parsed as dates from CSV
date_columns = [
......@@ -396,7 +385,8 @@ def main():
)
else:
print(f"No raw files found for timeframe {start_datetime} through {end_datetime}")
msg = f"No raw files found for timeframe {start_datetime} through {end_datetime}"
logger.warning(msg)
if __name__ == "__main__":
......
"""Different services that the capacity planner interacts with."""
"""Interactions with Kentik through their API."""
import os
import requests
......@@ -24,16 +26,14 @@ EGRESS_DIMENSION = "i_ult_exit_site"
def _api_query(payload):
# Headers for authentication
headers = {"Content-Type": "application/json", "X-CH-Auth-Email": API_EMAIL, "X-CH-Auth-API-Token": API_TOKEN}
response = requests.post(API_URL, headers=headers, json=payload)
response = requests.post(API_URL, headers=headers, json=payload, timeout=120)
if response.status_code == 200:
return response.json()
print(f"Error fetching data from Kentik API: {response.status_code} - {response.text}")
return None
response.raise_for_status()
return response.json()
def fetch_kentik_traffic_matrix():
# JSON query payload
"""Fetch a traffic matrix from Kentik."""
payload = {
"version": 4,
"queries": [
......@@ -44,12 +44,10 @@ def fetch_kentik_traffic_matrix():
"all_devices": True,
"aggregateTypes": ["max_in_bits_per_sec"],
"depth": 350,
"topx": 350, # 350=max supported by Kentik
"topx": 350,
"device_name": [],
"fastData": "Auto",
"lookback_seconds": 60 * KENTIK_REPORTING_PERIOD,
# "starting_time": null,
# "ending_time": null,
"matrixBy": [],
"metric": ["in_bytes"],
"minsPolling": KENTIK_FLOW_AGGR_WINDOW,
......@@ -96,8 +94,7 @@ def fetch_kentik_traffic_matrix():
],
}
response = _api_query(payload)
return response
return _api_query(payload)
###############################################################################
......@@ -106,9 +103,9 @@ def fetch_kentik_traffic_matrix():
def kentik_to_traffic_matrices(json_data, nodes):
"""Convert the given JSON structure returned by Kentik into a dictionary
keyed by timestamp. For each timestamp, we store a nested dict of:
traffic_matrices[timestamp][ingress][egress] = traffic_rate_Mbps
"""Convert the given JSON structure returned by Kentik into a dictionary keyed by timestamp.
For each timestamp, we store a nested dict of: traffic_matrices[timestamp][ingress][egress] = traffic_rate_Mbps
"""
# We'll gather all flows in the JSON
data_entries = json_data["results"][0]["data"]
......
"""Utilities for capacity-planner."""
# (A) Define each core link:
# ( linkID, nodeA, nodeB, igp_metric, capacity, srlg_list, [normal_threshold], [failure_threshold] )
# where:
# linkID: network-wide unique numeric ID (e.g. 1001)
# nodeA, nodeB: core link endpoints
# igp_metric: IGP cost/distance
# capacity: full-duplex link capacity in Gbps
# srlg_list: list of Shared Risk Link Group (SRLG) names (or empty)
# normal_threshold: fraction for normal usage. If omitted default is used
# failure_threshold: fraction for usage under failure. If omitted default is used
"""Define each core link in a topology.
Each link is a tuple shaped like:
( linkID, nodeA, nodeB, igp_metric, capacity, srlg_list, [normal_threshold], [failure_threshold] )
where:
linkID: network-wide unique numeric ID (e.g. 1001)
nodeA, nodeB: core link endpoints
igp_metric: IGP cost/distance
capacity: full-duplex link capacity in Gbps
srlg_list: list of Shared Risk Link Group (SRLG) names (or empty)
normal_threshold: fraction for normal usage. If omitted default is used
failure_threshold: fraction for usage under failure. If omitted default is used
"""
CORELINKS = [
(1, "AMS", "FRA", 2016, 800, []),
(2, "AMS", "LON", 1428, 800, []),
......@@ -43,22 +48,12 @@ CORELINKS = [
(32, "ATH2", "MIL2", 25840, 100, ["MIL2-PRE"]),
(33, "ATH2", "THE", 8200, 100, []),
(34, "SOF", "THE", 5800, 100, []),
# ("COP", "HAM", 480, 400, []),
# ("COP", "STO", 600, 400, []),
# ("HEL", "STO", 630, 400, []),
(35, "RIG", "TAR", 2900, 100, []),
(36, "KAU", "POZ", 10050, 100, []),
# ("BEL", "SOF", 444, 400, []),
# ("BEL", "ZAG", 528, 400, []),
(37, "ZAG", "SOF", 9720, 200, []),
(38, "BRA", "BUD", 50000, 100, ["BRA-BUD"]),
(39, "COR", "LON2", 7160, 100, ["COR-LON2"]),
# ("HEL", "TAR", 227, 400, []),
(40, "KAU", "RIG", 4500, 100, []),
# ("BRU", "LUX", 380, 400, []),
# ("FRA", "LUX", 312, 400, []),
# ("RIG", "STO", 400, 400, []),
# ("COP", "POZ", 750, 400, []),
(41, "COR", "PAR", 12549, 100, ["COR-LON2"]),
(42, "KIE", "POZ", 50000, 100, []),
(43, "CHI", "BUC", 50000, 40, []),
......@@ -111,25 +106,21 @@ NODES = [
NODE_FAILOVER_RATIOS = {
"AMS": {"LON": 0.6, "FRA": 0.4},
"ATH": {"THE": 0.5, "MAR": 0.5},
# "BEL": {"ZAG": 1.0 },
"BIL": {"MAD": 1.0},
"BRA": {"VIE": 1.0},
"BRU": {"AMS": 1.0},
"BUC": {"VIE": 0.5, "SOF": 0.5},
"BUD": {"ZAG": 1.0},
# "COP": {"HAM": 0.5, "STO": 0.5 },
"COR": {"DUB": 1.0},
"DUB": {"COR": 1.0},
"FRA": {"HAM": 0.4, "AMS": 0.4, "LON": 0.2},
"GEN": {"PAR": 0.6, "MIL2": 0.4},
"HAM": {"FRA": 0.5, "POZ": 0.2, "LON": 0.3},
# "HEL": {"TAR": 0.3, "HAM": 0.7 },
"KAU": {"RIG": 1.0},
"LIS": {"POR": 1.0},
"LJU": {"ZAG": 1.0},
"LON": {"AMS": 0.4, "HAM": 0.2, "FRA": 0.4},
"LON2": {"LON": 1.0},
# "LUX": {"BRU": 1.0 },
"MAD": {"BIL": 1.0},
"MAR": {"MIL2": 0.6, "ATH": 0.4},
"MIL2": {"GEN": 0.3, "MAR": 0.3, "VIE": 0.3},
......@@ -139,7 +130,6 @@ NODE_FAILOVER_RATIOS = {
"PRA": {"VIE": 1.0},
"RIG": {"KAU": 1.0},
"SOF": {"THE": 0.5, "BUC": 0.5},
# "STO": {"COP": 0.5, "HEL": 0.5 },
"TAR": {"RIG": 1.0},
"THE": {"ATH": 0.5, "SOF": 0.5},
"VIE": {"MIL2": 0.6, "PRA": 0.2, "BUC": 0.2},
......
This diff is collapsed.
......@@ -25,6 +25,7 @@ enable_error_code = "ignore-without-code"
extend-exclude = [
"htmlcov",
"docs",
"capacity_planner/alembic"
]
target-version = "py312"
line-length = 120
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment