diff --git a/CPScripts/access-logparser.py b/CPScripts/access-logparser.py new file mode 100644 index 000000000..e3088ed11 --- /dev/null +++ b/CPScripts/access-logparser.py @@ -0,0 +1,230 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Originally based on code from: https://leancrew.com/all-this/2013/07/parsing-my-apache-logs/ + +import os +import re +import sys +from collections import Counter +from datetime import datetime, date, timedelta + + +# print('version is', sys.version) + +def detectcontrolpanel(): + global controlpanel + try: + if os.path.isfile('/usr/local/cpanel/cpanel'): + controlpanel = 'cpanel' + except: + controlpanel = 'Control Panel not found' + + try: + if os.path.isfile('/usr/bin/cyberpanel'): + controlpanel = 'cyberpanel' + except: + controlpanel = 'Control Panel not found' + return controlpanel + + +def main(): + script = sys.argv[0] + filename = sys.argv[2] + # filenametest = "/home/example.com.access_log" + + # Define the day of interest in the Apache common log format. + try: + daysAgo = int(sys.argv[1]) + # daysAgo = 2 + except: + daysAgo = 1 + theDay = date.today() - timedelta(daysAgo) + apacheDay = theDay.strftime('[%d/%b/%Y:') + + # Regex for the Apache common log format. + parts = [ # host %h :ip/hostname of the client 172.68.142.138 + # indent %l (unused) :client identity via client's identd configuration - + # user %u :HTTP authenticated user ID - + # time %t :timestamp [09/Mar/2019:00:38:03 -0600] + # request "%r" :request method of request, resource requested, & protocol "POST /wp-login.php HTTP/1.1" + # status %>s :Apache status code 404 + # size %b (careful,can be'-'):size of request in bytes, excluding headers 3767 + # referrer "%{Referer}i" :Referer "https://www.google.com/" + # user agent "%{User-agent}i":User-Agent "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0" + r'(?P\S+)', + r'\S+', + r'(?P\S+)', + r'\[(?P