#!/bin/env python3

import logging
import json
from datetime import datetime

'''
date="2023-08-07"
example_dict = { "ciao":0,
                 ...
               }
json = { date:example_dict }
'''

eccs_json_format = {
    "IPUnique": 0, # All IP found on the logs
    "IdPUnique": 0, # All requests containing the "idp=" parameter (API or WEB)
    "RequestAPI": 0, # All requests containing "api" on the URL
    "RequestWEB": 0, # All requests not containing "api" on the URL
    "RegistrarUnique": 0, # All requests containing the "reg_auth=" parameter (API or WEB)
    "Status": { # All requests contaning the "status=" parameter (API or WEB)
      "OK": 0,       # ? Why we neet to collect this value for 'status' parameter?
      "ERROR": 0,    # ? Why we neet to collect this value for 'status' parameter?
      "UNKNOWN": 0,  # ? Why we neet to collect this value for 'status' parameter?
      "DISABLED": 0  # ? Why we neet to collect this value for 'status' parameter?
    },
    "CountByRegistrar": { # This could be useful to understand who use the API/WEB (reg_auth)
      "http://www.idem.garr.it/": {
        "IPUnique": 0,
        "IdPUnique": 0,
        "RequestAPI": 0,
        "RequestWEB": 0,
        "Status": {
          "OK": 0,       # ? Why we neet to collect this value for 'status' parameter?
          "ERROR": 0,    # ? Why we neet to collect this value for 'status' parameter?
          "UNKNOWN": 0,  # ? Why we neet to collect this value for 'status' parameter?
          "DISABLED": 0  # ? Why we neet to collect this value for 'status' parameter?
        }
      },
      "http://www.srce.hr": {
        "IPUnique": 0,
        "IdPUnique": 0,
        "RequestAPI": 0,
        "RequestWEB": 0,
        "Status": {
          "OK": 0,
          "ERROR": 0,
          "UNKNOWN": 0,
          "DISABLED": 0
        }
      }
    },
    "CountByIPFromAPI": { # This could be useful to understand who use the API/WEB (by IP)
      "4.4.4.4": {
        "IdPUnique": 0,
        "RegistrarUnique": 0,
        "Status": {
          "OK": 0,      # ? Why we neet to collect this value for 'status' parameter?
          "ERROR": 0,   # ? Why we neet to collect this value for 'status' parameter?
          "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
          "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
        }
      },
      "8.8.8.8": {
        "IdPUnique": 0,
        "RegistrarUnique": 0,
        "Status": {
          "OK": 0,
          "ERROR": 0,
          "UNKNOWN": 0,
          "DISABLED": 0
        }
      }
    },
    "countByHour": { # This could be useful to understand when automatic script run from federations
      "17": {
        "IPUnique": 0,
        "IdPUnique": 0,
        "RequestAPI": 0,
        "RequestWEB": 0,
        "Status": {
          "OK": 0,      # ? Why we neet to collect this value for 'status' parameter?
          "ERROR": 0,   # ? Why we neet to collect this value for 'status' parameter?
          "UNKNOWN": 0, # ? Why we neet to collect this value for 'status' parameter?
          "DISABLED": 0 # ? Why we neet to collect this value for 'status' parameter?
        }
      },
      "23": {
        "IPUnique": 0,
        "IdPUnique": 0,
        "RequestAPI": 0,
        "RequestWEB": 0,
        "Status": {
          "OK": 0,
          "ERROR": 0,
          "UNKNOWN": 0,
          "DISABLED": 0
        }
      }
    }
  }

def get_url_info(url,parameter):
    # url = /eccs/api/eccsresults?idp=https://garr-idp-prod.irccs.garr.it/idp/shibboleth
    # parameter = 'idp' | 'reg_auth'

    list_info = url.split('?')[1].split('&')

    for info in list_info:
        if (parameter in info):
            return info.split('=')[1]

def main():
    logging.basicConfig(filename='/home/eccs/logs/eccs-log-parsing.log', level=logging.INFO)
    logging.info('Started')

    file_name = "/home/eccs/logs/eccs-uwsgi.log"
    #file_name = "/home/eccs/logs/example.log"
    file = open(file_name, "r")
    data = []
    order = ["ip", "date", "http-method", "url"]

    list_ip = []
    list_idp = []
    list_reg_auth = []
    list_status = []
    eccs_stats = {}
    
    for line in file.readlines():
        if ("|" not in line): continue

        details = line.split("|")
        details = [x.strip() for x in details]
        details[1] = datetime.strptime(details[1], '[%a %b %d %H:%M:%S %Y]').strftime('%Y-%m-%d')
        structure = {key:value for key, value in zip(order, details)}
        data.append(structure)

    for entry in data:

        eccs_stats[entry['date']] = eccs_json_format

        # IPUnique - global
        if (entry['ip'] not in list_ip):
            list_ip.append(entry['ip'])
            eccs_stats[entry['date']]['IPUnique']+=1

        # IdPUnique - global
        if ('idp=' in entry['url']):
            idp = get_url_info(entry['url'], 'idp')
            if (idp not in list_idp):
                list_idp.append(idp)
                eccs_stats[entry['date']]['IdPUnique']+=1

        # RequestAPI - global
        if ('/eccs/api/' in entry['url']): eccs_stats[entry['date']]['RequestAPI']+=1
        else: eccs_stats[entry['date']]['RequestWEB']+=1

        # RegistrarUnique - global
        if ('reg_auth=' in entry['url']):
            reg_auth = get_url_info(entry['url'], 'reg_auth')
            if (reg_auth not in list_reg_auth):
                list_reg_auth.append(reg_auth)
                eccs_stats[entry['date']]['RegistrarUnique']+=1

        # Status - global
        if ('status' in entry['url']):
            status = get_url_info(entry['url'], 'status')
            if (status == 'OK'): eccs_stats[entry['date']]['Status']['OK']+=1
            if (status == 'ERROR'): eccs_stats[entry['date']]['Status']['ERROR']+=1
            if (status == 'UNKNOWN'): eccs_stats[entry['date']]['Status']['UNKNOWN']+=1
            if (status == 'DISABLE'): eccs_stats[entry['date']]['Status']['DISABLE']+=1


        #print(json.dumps(entry, indent = 4))
        #eccs_json_format['IPUnique'] = len(list_ip)
        #eccs_json_format['IdPUnique'] = len(list_idp)
        #eccs_json_format['RegistrarUnique'] = len(list_reg_auth)


    print(eccs_stats)


    logging.info('Finished')

if __name__ == '__main__':
    main()