Skip to content
Snippets Groups Projects
Commit 542a7fc4 authored by Marco Malavolti's avatar Marco Malavolti
Browse files

improved Python3 Log parser

parent 3d015b07
No related branches found
No related tags found
No related merge requests found
parse-logs.py 100644 → 100755
#!/bin/env python3
import logging
import json
from datetime import datetime
'''
date="2023-08-07"
example_dict = { "ciao":0,
...
}
json = { date:example_dict }
'''
eccs_json_format = {
"IPUnique": 0, # All IP found on the logs
"IdPUnique": 0, # All requests containing the "idp=" parameter (API or WEB)
"RequestAPI": 0, # All requests containing "api" on the URL
"RequestWEB": 0, # All requests not containing "api" on the URL
"RegistrarUnique": 0, # All requests containing the "reg_auth=" parameter (API or WEB)
"Status": { # All requests contaning the "status=" parameter (API or WEB)
"OK": 0, # ? Why we neet to collect this value for 'status' parameter?
"ERROR": 0, # ? Why we neet to collect this value for 'status' parameter?
"UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
"DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
},
"CountByRegistrar": { # This could be useful to understand who use the API/WEB (reg_auth)
"http://www.idem.garr.it/": {
"IPUnique": 0,
"IdPUnique": 0,
"RequestAPI": 0,
"RequestWEB": 0,
"Status": {
"OK": 0, # ? Why we neet to collect this value for 'status' parameter?
"ERROR": 0, # ? Why we neet to collect this value for 'status' parameter?
"UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
"DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
}
},
"http://www.srce.hr": {
"IPUnique": 0,
"IdPUnique": 0,
"RequestAPI": 0,
"RequestWEB": 0,
"Status": {
"OK": 0,
"ERROR": 0,
"UNKNOWN": 0,
"DISABLED": 0
}
}
},
"CountByIPFromAPI": { # This could be useful to understand who use the API/WEB (by IP)
"4.4.4.4": {
"IdPUnique": 0,
"RegistrarUnique": 0,
"Status": {
"OK": 0, # ? Why we neet to collect this value for 'status' parameter?
"ERROR": 0, # ? Why we neet to collect this value for 'status' parameter?
"UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
"DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
}
},
"8.8.8.8": {
"IdPUnique": 0,
"RegistrarUnique": 0,
"Status": {
"OK": 0,
"ERROR": 0,
"UNKNOWN": 0,
"DISABLED": 0
}
}
},
"countByHour": { # This could be useful to understand when automatic script run from federations
"17": {
"IPUnique": 0,
"IdPUnique": 0,
"RequestAPI": 0,
"RequestWEB": 0,
"Status": {
"OK": 0, # ? Why we neet to collect this value for 'status' parameter?
"ERROR": 0, # ? Why we neet to collect this value for 'status' parameter?
"UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
"DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
}
},
"23": {
"IPUnique": 0,
"IdPUnique": 0,
"RequestAPI": 0,
"RequestWEB": 0,
"Status": {
"OK": 0,
"ERROR": 0,
"UNKNOWN": 0,
"DISABLED": 0
}
}
}
}
def get_url_info(url,parameter):
# url = /eccs/api/eccsresults?idp=https://garr-idp-prod.irccs.garr.it/idp/shibboleth
# parameter = 'idp' | 'reg_auth'
list_info = url.split('?')[1].split('&')
for info in list_info:
if (parameter in info):
return info.split('=')[1]
def main():
logging.basicConfig(filename='/home/eccs/logs/eccs-log-parsing.log', level=logging.INFO)
logging.info('Started')
file_name = "/home/eccs/logs/eccs-uwsgi.log"
#file_name = "/home/eccs/logs/example.log"
file = open(file_name, "r")
data = []
order = ["ip", "date", "http-method", "url"]
list_ip = []
list_idp = []
list_reg_auth = []
list_status = []
eccs_stats = {}
for line in file.readlines():
if ("|" not in line): continue
......@@ -19,9 +134,51 @@ def main():
details[1] = datetime.strptime(details[1], '[%a %b %d %H:%M:%S %Y]').strftime('%Y-%m-%d')
structure = {key:value for key, value in zip(order, details)}
data.append(structure)
for entry in data:
print(json.dumps(entry, indent = 4))
eccs_stats[entry['date']] = eccs_json_format
# IPUnique - global
if (entry['ip'] not in list_ip):
list_ip.append(entry['ip'])
eccs_stats[entry['date']]['IPUnique']+=1
# IdPUnique - global
if ('idp=' in entry['url']):
idp = get_url_info(entry['url'], 'idp')
if (idp not in list_idp):
list_idp.append(idp)
eccs_stats[entry['date']]['IdPUnique']+=1
# RequestAPI - global
if ('/eccs/api/' in entry['url']): eccs_stats[entry['date']]['RequestAPI']+=1
else: eccs_stats[entry['date']]['RequestWEB']+=1
# RegistrarUnique - global
if ('reg_auth=' in entry['url']):
reg_auth = get_url_info(entry['url'], 'reg_auth')
if (reg_auth not in list_reg_auth):
list_reg_auth.append(reg_auth)
eccs_stats[entry['date']]['RegistrarUnique']+=1
# Status - global
if ('status' in entry['url']):
status = get_url_info(entry['url'], 'status')
if (status == 'OK'): eccs_stats[entry['date']]['Status']['OK']+=1
if (status == 'ERROR'): eccs_stats[entry['date']]['Status']['ERROR']+=1
if (status == 'UNKNOWN'): eccs_stats[entry['date']]['Status']['UNKNOWN']+=1
if (status == 'DISABLE'): eccs_stats[entry['date']]['Status']['DISABLE']+=1
#print(json.dumps(entry, indent = 4))
#eccs_json_format['IPUnique'] = len(list_ip)
#eccs_json_format['IdPUnique'] = len(list_idp)
#eccs_json_format['RegistrarUnique'] = len(list_reg_auth)
print(eccs_stats)
logging.info('Finished')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment