#!/bin/env python3 import logging import json from datetime import datetime ''' date="2023-08-07" example_dict = { "ciao":0, ... } json = { date:example_dict } ''' eccs_json_format = { "IPUnique": 0, # All IP found on the logs "IdPUnique": 0, # All requests containing the "idp=" parameter (API or WEB) "RequestAPI": 0, # All requests containing "api" on the URL "RequestWEB": 0, # All requests not containing "api" on the URL "RegistrarUnique": 0, # All requests containing the "reg_auth=" parameter (API or WEB) "Status": { # All requests contaning the "status=" parameter (API or WEB) "OK": 0, # ? Why we neet to collect this value for 'status' parameter? "ERROR": 0, # ? Why we neet to collect this value for 'status' parameter? "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter? "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter? }, "CountByRegistrar": { # This could be useful to understand who use the API/WEB (reg_auth) "http://www.idem.garr.it/": { "IPUnique": 0, "IdPUnique": 0, "RequestAPI": 0, "RequestWEB": 0, "Status": { "OK": 0, # ? Why we neet to collect this value for 'status' parameter? "ERROR": 0, # ? Why we neet to collect this value for 'status' parameter? "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter? "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter? } }, "http://www.srce.hr": { "IPUnique": 0, "IdPUnique": 0, "RequestAPI": 0, "RequestWEB": 0, "Status": { "OK": 0, "ERROR": 0, "UNKNOWN": 0, "DISABLED": 0 } } }, "CountByIPFromAPI": { # This could be useful to understand who use the API/WEB (by IP) "4.4.4.4": { "IdPUnique": 0, "RegistrarUnique": 0, "Status": { "OK": 0, # ? Why we neet to collect this value for 'status' parameter? "ERROR": 0, # ? Why we neet to collect this value for 'status' parameter? "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter? "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter? } }, "8.8.8.8": { "IdPUnique": 0, "RegistrarUnique": 0, "Status": { "OK": 0, "ERROR": 0, "UNKNOWN": 0, "DISABLED": 0 } } }, "countByHour": { # This could be useful to understand when automatic script run from federations "17": { "IPUnique": 0, "IdPUnique": 0, "RequestAPI": 0, "RequestWEB": 0, "Status": { "OK": 0, # ? Why we neet to collect this value for 'status' parameter? "ERROR": 0, # ? Why we neet to collect this value for 'status' parameter? "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter? "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter? } }, "23": { "IPUnique": 0, "IdPUnique": 0, "RequestAPI": 0, "RequestWEB": 0, "Status": { "OK": 0, "ERROR": 0, "UNKNOWN": 0, "DISABLED": 0 } } } } def get_url_info(url,parameter): # url = /eccs/api/eccsresults?idp=https://garr-idp-prod.irccs.garr.it/idp/shibboleth # parameter = 'idp' | 'reg_auth' list_info = url.split('?')[1].split('&') for info in list_info: if (parameter in info): return info.split('=')[1] def main(): logging.basicConfig(filename='/home/eccs/logs/eccs-log-parsing.log', level=logging.INFO) logging.info('Started') file_name = "/home/eccs/logs/eccs-uwsgi.log" #file_name = "/home/eccs/logs/example.log" file = open(file_name, "r") data = [] order = ["ip", "date", "http-method", "url"] list_ip = [] list_idp = [] list_reg_auth = [] list_status = [] eccs_stats = {} for line in file.readlines(): if ("|" not in line): continue details = line.split("|") details = [x.strip() for x in details] details[1] = datetime.strptime(details[1], '[%a %b %d %H:%M:%S %Y]').strftime('%Y-%m-%d') structure = {key:value for key, value in zip(order, details)} data.append(structure) for entry in data: eccs_stats[entry['date']] = eccs_json_format # IPUnique - global if (entry['ip'] not in list_ip): list_ip.append(entry['ip']) eccs_stats[entry['date']]['IPUnique']+=1 # IdPUnique - global if ('idp=' in entry['url']): idp = get_url_info(entry['url'], 'idp') if (idp not in list_idp): list_idp.append(idp) eccs_stats[entry['date']]['IdPUnique']+=1 # RequestAPI - global if ('/eccs/api/' in entry['url']): eccs_stats[entry['date']]['RequestAPI']+=1 else: eccs_stats[entry['date']]['RequestWEB']+=1 # RegistrarUnique - global if ('reg_auth=' in entry['url']): reg_auth = get_url_info(entry['url'], 'reg_auth') if (reg_auth not in list_reg_auth): list_reg_auth.append(reg_auth) eccs_stats[entry['date']]['RegistrarUnique']+=1 # Status - global if ('status' in entry['url']): status = get_url_info(entry['url'], 'status') if (status == 'OK'): eccs_stats[entry['date']]['Status']['OK']+=1 if (status == 'ERROR'): eccs_stats[entry['date']]['Status']['ERROR']+=1 if (status == 'UNKNOWN'): eccs_stats[entry['date']]['Status']['UNKNOWN']+=1 if (status == 'DISABLE'): eccs_stats[entry['date']]['Status']['DISABLE']+=1 #print(json.dumps(entry, indent = 4)) #eccs_json_format['IPUnique'] = len(list_ip) #eccs_json_format['IdPUnique'] = len(list_idp) #eccs_json_format['RegistrarUnique'] = len(list_reg_auth) print(eccs_stats) logging.info('Finished') if __name__ == '__main__': main()