import os import re import argparse import sys import pandas as pd # https://pandas.pydata.org from datetime import datetime, timezone, timedelta from pathlib import Path import numpy as np ############################################################################### # INPUT DATA SECTION ############################################################################### # make sure this matches with the What-If Scenario runner script RAW_REPORT_DIRECTORY="/Users/daniel.verlouw/Desktop/rawreports" RAW_REPORT_FILE_PREFIX="raw_capacityreport_" RAW_REPORT_FILE_SUFFIX="csv" CONSOLIDATED_REPORT_DIRECTORY="/Users/daniel.verlouw/Desktop/consolidatedreports" CONSOLIDATED_REPORT_FILE_PREFIX="consolidated_capacityreport_" CONSOLIDATED_REPORT_FILE_SUFFIX_RAW="csv" CONSOLIDATED_REPORT_FILE_SUFFIX_HUMAN="txt" # Scenarios triggering a 'MUST UPGRADE' if threshold is exceeded MUST_UPGRADE_SCENARIOS=["normal","onelinkfail","onenodefail"] # Scenarios triggering a 'SHOULD UPGRADE' if threshold is exceeded SHOULD_UPGRADE_SCENARIOS=["twolinkfail","nodelinkfail"] # ISO 8601 basic timestamp format (YYYYMMDDTHHMMZ) ISO8601_FORMAT = '%Y%m%dT%H%MZ' ISO8601_REGEXP = r'\d{4}\d{2}\d{2}T\d{2}\d{2}Z' ############################################################################### # RAW CONSOLIDATED REPORT ############################################################################### # --- Helper function to get the row of the max usage for a given column, handling empty/missing columns --- def get_max_usage_row(group, usage_col): """ Returns a single row (as a Series) within `group` that has the maximum value in `usage_col`. If `usage_col` does not exist or is entirely NaN, returns None. """ # If the column doesn't exist or has all null values, return None if usage_col not in group.columns or group[usage_col].dropna().empty: return None max_val = group[usage_col].max() # Filter to rows where usage_col equals the maximum value. max_rows = group[group[usage_col] == max_val] # Return only the first row among those with the maximum value. return max_rows.iloc[0] def extract_usage_details(group): """ For a single group of rows (all links with the same ID), find the row with the max usage for each usage field (Gbps) and extract the relevant columns. Booleans are set to True if at least one row in the group is True. """ # We'll create a dict to hold the final data for this ID. out = {} # Basic info columns (these are assumed to be consistent within the group) first_row = group.iloc[0] out['ID'] = first_row.get('ID', None) out['NodeA'] = first_row.get('NodeA', None) out['NodeB'] = first_row.get('NodeB', None) out['CapacityGbps'] = first_row.get('CapacityGbps', None) out['ConfNormalThrPct'] = first_row.get('ConfNormalThrPct', None) out['ConfNormalThrGbps'] = first_row.get('ConfNormalThrGbps', None) out['ConfFailThrPct'] = first_row.get('ConfFailThrPct', None) out['ConfFailThrGbps'] = first_row.get('ConfFailThrGbps', None) # Normal usage normal_row = get_max_usage_row(group, 'NormalUsageGbps') out['NormalEnabled'] = bool(group['NormalEnabled'].any()) if 'NormalEnabled' in group.columns else False out['NormalUsagePct'] = normal_row.get('NormalUsagePct', np.nan) if normal_row is not None else np.nan out['NormalUsageGbps'] = normal_row.get('NormalUsageGbps', np.nan) if normal_row is not None else np.nan out['NormalUsageExceed'] = bool(group['NormalUsageExceed'].any()) if 'NormalUsageExceed' in group.columns else False out['NormalDateTime'] = normal_row.get('NormalDateTime', pd.NaT) if normal_row is not None else pd.NaT # 1 Link Fail usage one_link_row = get_max_usage_row(group, '1LinkFailUsageGbps') out['1LinkFailEnabled'] = bool(group['1LinkFailEnabled'].any()) if '1LinkFailEnabled' in group.columns else False out['1LinkFailScenario'] = one_link_row.get('1LinkFailScenario', None) if one_link_row is not None else None out['1LinkFailUsagePct'] = one_link_row.get('1LinkFailUsagePct', np.nan) if one_link_row is not None else np.nan out['1LinkFailUsageGbps'] = one_link_row.get('1LinkFailUsageGbps', np.nan) if one_link_row is not None else np.nan out['1LinkFailUsageExceed'] = bool(group['1LinkFailUsageExceed'].any()) if '1LinkFailUsageExceed' in group.columns else False out['1LinkFailUsageTime'] = one_link_row.get('1LinkFailUsageTime', pd.NaT) if one_link_row is not None else pd.NaT # 2 Link Fail usage two_link_row = get_max_usage_row(group, '2LinkFailUsageGbps') out['2LinkFailEnabled'] = bool(group['2LinkFailEnabled'].any()) if '2LinkFailEnabled' in group.columns else False out['2LinkFailScenario'] = two_link_row.get('2LinkFailScenario', None) if two_link_row is not None else None out['2LinkFailUsagePct'] = two_link_row.get('2LinkFailUsagePct', np.nan) if two_link_row is not None else np.nan out['2LinkFailUsageGbps'] = two_link_row.get('2LinkFailUsageGbps', np.nan) if two_link_row is not None else np.nan out['2LinkFailUsageExceed'] = bool(group['2LinkFailUsageExceed'].any()) if '2LinkFailUsageExceed' in group.columns else False out['2LinkFailUsageTime'] = two_link_row.get('2LinkFailUsageTime', pd.NaT) if two_link_row is not None else pd.NaT # 1 Node Fail usage one_node_row = get_max_usage_row(group, '1NodeFailUsageGbps') out['1NodeFailEnabled'] = bool(group['1NodeFailEnabled'].any()) if '1NodeFailEnabled' in group.columns else False out['1NodeFailScenario'] = one_node_row.get('1NodeFailScenario', None) if one_node_row is not None else None out['1NodeFailUsagePct'] = one_node_row.get('1NodeFailUsagePct', np.nan) if one_node_row is not None else np.nan out['1NodeFailUsageGbps'] = one_node_row.get('1NodeFailUsageGbps', np.nan) if one_node_row is not None else np.nan out['1NodeFailUsageExceed'] = bool(group['1NodeFailUsageExceed'].any()) if '1NodeFailUsageExceed' in group.columns else False out['1NodeFailUsageTime'] = one_node_row.get('1NodeFailUsageTime', pd.NaT) if one_node_row is not None else pd.NaT # Node+1 Link Fail usage node_plus_link_row = get_max_usage_row(group, 'Node+1LinkFailUsageGbps') out['Node+1LinkFailEnabled'] = bool(group['Node+1LinkFailEnabled'].any()) if 'Node+1LinkFailEnabled' in group.columns else False out['Node+1LinkFailScenario'] = node_plus_link_row.get('Node+1LinkFailScenario', None) if node_plus_link_row is not None else None out['Node+1LinkFailUsagePct'] = node_plus_link_row.get('Node+1LinkFailUsagePct', np.nan) if node_plus_link_row is not None else np.nan out['Node+1LinkFailUsageGbps'] = node_plus_link_row.get('Node+1LinkFailUsageGbps', np.nan) if node_plus_link_row is not None else np.nan out['Node+1LinkFailUsageExceed'] = bool(group['Node+1LinkFailUsageExceed'].any()) if 'Node+1LinkFailUsageExceed' in group.columns else False out['Node+1LinkFailUsageTime'] = node_plus_link_row.get('Node+1LinkFailUsageTime', pd.NaT) if node_plus_link_row is not None else pd.NaT # Finally, consolidated upgrade flags out['MustUpgrade'] = bool(group['MustUpgrade'].any()) if 'MustUpgrade' in group.columns else False out['ShouldUpgrade'] = bool(group['ShouldUpgrade'].any()) if 'ShouldUpgrade' in group.columns else False return pd.Series(out) ############################################################################### # HUMAN READABLE CONSOLIDATED REPORT ############################################################################### def build_human_report(df_raw): df_human= df_raw.copy() # Helper formatting functions def mark_upgrade(scenario, exceed): if scenario in MUST_UPGRADE_SCENARIOS and exceed: return f" (**)" elif scenario in SHOULD_UPGRADE_SCENARIOS and exceed: return f" (*)" else: return "" def normal_thr(row): return f"{row['ConfNormalThrPct']*100:.0f}% / {row['ConfNormalThrGbps']:.0f}G" def fail_thr(row): return f"{row['ConfFailThrPct']*100:.0f}% / {row['ConfFailThrGbps']:.0f}G" def normal_usage(row): return f"{row['NormalUsagePct']*100:.0f}% / {row['NormalUsageGbps']:.1f}G" + mark_upgrade("normal", row['NormalUsageExceed']) def onelinkfail_usage(row): return f"{row['1LinkFailUsagePct']*100:.0f}% / {row['1LinkFailUsageGbps']:.1f}G" + mark_upgrade("onelinkfail", row['1LinkFailUsageExceed']) def twolinkfail_usage(row): return f"{row['2LinkFailUsagePct']*100:.0f}% / {row['2LinkFailUsageGbps']:.1f}G" + mark_upgrade("twolinkfail", row['2LinkFailUsageExceed']) def onenodefail_usage(row): return f"{row['1NodeFailUsagePct']*100:.0f}% / {row['1NodeFailUsageGbps']:.1f}G" + mark_upgrade("onenodefail", row['1NodeFailUsageExceed']) def nodelinkfail_usage(row): return f"{row['Node+1LinkFailUsagePct']*100:.0f}% / {row['Node+1LinkFailUsageGbps']:.1f}G" + mark_upgrade("nodelinkfail", row['Node+1LinkFailUsageExceed']) def upgrade(row): return "MUST (**)" if row['MustUpgrade'] else ("SHOULD (*)" if row['ShouldUpgrade'] else "NO") df_human['ConfNormalThr'] = df_human.apply(normal_thr, axis=1) df_human['ConfFailThr'] = df_human.apply(fail_thr, axis=1) df_human['NormalUsage'] = df_human.apply(normal_usage, axis=1) df_human['1LinkFailUsage'] = df_human.apply(onelinkfail_usage, axis=1) df_human['2LinkFailUsage'] = df_human.apply(twolinkfail_usage, axis=1) df_human['1NodeFailUsage'] = df_human.apply(onenodefail_usage, axis=1) df_human['Node+1LinkFailUsage'] = df_human.apply(nodelinkfail_usage, axis=1) df_human['Upgrade'] = df_human.apply(upgrade, axis=1) # Drop unused columns df_human.drop(['ConfNormalThrPct','ConfNormalThrGbps'],axis=1, inplace=True) df_human.drop(['ConfFailThrPct','ConfFailThrGbps'],axis=1, inplace=True) df_human.drop(['NormalUsagePct','NormalUsageGbps','NormalUsageExceed'],axis=1, inplace=True) df_human.drop(['1LinkFailUsagePct','1LinkFailUsageGbps','1LinkFailUsageExceed','1LinkFailUsageTime'],axis=1, inplace=True) df_human.drop(['2LinkFailUsagePct','2LinkFailUsageGbps','2LinkFailUsageExceed','2LinkFailUsageTime'],axis=1, inplace=True) df_human.drop(['1NodeFailUsagePct','1NodeFailUsageGbps','1NodeFailUsageExceed','1NodeFailUsageTime'],axis=1, inplace=True) df_human.drop(['Node+1LinkFailUsagePct','Node+1LinkFailUsageGbps','Node+1LinkFailUsageExceed','Node+1LinkFailUsageTime'],axis=1, inplace=True) df_human.drop(['MustUpgrade','ShouldUpgrade'],axis=1, inplace=True) # Replace NaN and NaT values with "N/A" df_human['NormalDateTime'] = df_human['NormalDateTime'].astype(object) df_human.fillna("N/A", inplace=True) return df_human[['NodeA', 'NodeB', 'CapacityGbps', 'ConfNormalThr', 'ConfFailThr', 'NormalUsage', 'NormalDateTime', '1LinkFailScenario', '1LinkFailUsage', '2LinkFailScenario', '2LinkFailUsage', '1NodeFailScenario', '1NodeFailUsage', 'Node+1LinkFailScenario', 'Node+1LinkFailUsage', 'Upgrade']] ############################################################################### # FILE FUNCTIONS ############################################################################### def find_files_by_timeframe(directory, prefix, suffix, start_datetime, end_datetime): # List all raw reports in directory all_raw_reports = [ file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file)) and file.startswith(prefix) and file.endswith(suffix) and re.search(ISO8601_REGEXP, file) ] # Filter to files that match the timestamp pattern within the specified datetime range matching_files = [] for file in all_raw_reports: match = re.search(ISO8601_REGEXP, file) file_date = datetime.strptime(match.group(), ISO8601_FORMAT).replace(tzinfo=timezone.utc) if start_datetime <= file_date <= end_datetime: matching_files.append(os.path.join(directory, file)) return matching_files def store_consolidated(df_consolidated, directory, prefix, suffix): path = Path(directory) path.mkdir(parents=True, exist_ok=True) # Create directory if it doesn't exist # Create a ISO8601 basic format UTC timestamped filename timestamp = datetime.now(timezone.utc).strftime(ISO8601_FORMAT) filename = f'{prefix}{timestamp}.{suffix}' if suffix == "csv": df_consolidated.to_csv(os.path.join(path, filename), sep=',', encoding='utf-8', date_format=ISO8601_FORMAT, header=True) elif suffix == "txt": markdown = df_consolidated.to_markdown(headers='keys', tablefmt='psql') # Write the markdown string to a file with open(os.path.join(path, filename), "w") as file: file.write(markdown) ############################################################################### # MAIN ############################################################################### def main(): # Parse commandline arguments parser = argparse.ArgumentParser(description='Script usage:') parser.add_argument('--daily', action='store_true', help='Create daily report (past day)') parser.add_argument('--weekly', action='store_true', help='Create weekly report (past week)') parser.add_argument('--monthly', action='store_true', help='Create monthly report (past month)') parser.add_argument('--quarterly', action='store_true', help='Create quarterly report (past quarter)') parser.add_argument('--yearly', action='store_true', help='Create yearly report (past year)') if len(sys.argv) == 1: # No arguments were provided; print help message. parser.print_help() sys.exit(1) args = parser.parse_args() today = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) first_day_of_current_month = today.replace(day=1) if args.daily: start_datetime= today - timedelta(days=1) end_datetime = today elif args.weekly: start_datetime= today - timedelta(weeks=1) end_datetime = today elif args.monthly: # First day of last month start_datetime= (first_day_of_current_month - timedelta(days=1)).replace(day=1) # First day of current month end_datetime = first_day_of_current_month elif args.quarterly: # Approximates the quarter as 90 days before the start of the current month. start_datetime= (first_day_of_current_month - timedelta(days=1)).replace(day=1) - timedelta(days=(first_day_of_current_month.month - 1) % 3 * 30) end_datetime = (first_day_of_current_month - timedelta(days=1)).replace(day=1) elif args.yearly: # First day of the previous year start_datetime= today.replace(year=today.year - 1, month=1, day=1) end_datetime = today.replace(year=today.year, month=1, day=1) matching_files= find_files_by_timeframe(RAW_REPORT_DIRECTORY, RAW_REPORT_FILE_PREFIX, RAW_REPORT_FILE_SUFFIX, start_datetime, end_datetime) if len(matching_files) > 0: print(f"Generating consolidated report for {len(matching_files)} raw reports for timeframe {start_datetime} through {end_datetime}") # List of columns that should be parsed as dates from CSV date_columns = ['NormalDateTime', '1LinkFailUsageTime', '2LinkFailUsageTime', '1NodeFailUsageTime', 'Node+1LinkFailUsageTime'] # Read and concat CSVs dataframes= [pd.read_csv(file, sep=',', encoding='utf-8', parse_dates=date_columns, date_format=ISO8601_FORMAT, float_precision='round_trip') for file in matching_files] concat_df = pd.concat(dataframes) # Walk over the results for each link and extract and store the highest usage for each scenario results = [] for id_val, group in concat_df.groupby('ID'): details = extract_usage_details(group) # Overwrite ID with the group key to be sure details['ID'] = id_val results.append(details) consolidated_raw = pd.DataFrame(results) consolidated_raw.set_index("ID", inplace=True) store_consolidated(consolidated_raw, CONSOLIDATED_REPORT_DIRECTORY, CONSOLIDATED_REPORT_FILE_PREFIX, CONSOLIDATED_REPORT_FILE_SUFFIX_RAW) consolidated_human= build_human_report(consolidated_raw) store_consolidated(consolidated_human, CONSOLIDATED_REPORT_DIRECTORY, CONSOLIDATED_REPORT_FILE_PREFIX, CONSOLIDATED_REPORT_FILE_SUFFIX_HUMAN) else: print(f"No raw files found for timeframe {start_datetime} through {end_datetime}") if __name__=="__main__": main()