2020-02-10 10:03:58 +05:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
2020-02-15 22:36:26 -05:00
# Apache Regex portion original credits to: https://leancrew.com/all-this/2013/07/parsing-my-apache-logs/
__author__ = " Michael Ramsey "
__version__ = " 0.1.0 "
__license__ = " GPL-3.0 "
2020-02-10 10:03:58 +05:00
import os
import re
import sys
2020-02-14 21:24:53 +05:00
import time
2020-02-10 10:03:58 +05:00
from collections import Counter
2020-02-14 21:24:53 +05:00
from datetime import date , timedelta
from datetime import datetime
from os . path import join , isfile
2020-02-10 10:03:58 +05:00
# print('version is', sys.version)
2020-02-14 21:24:53 +05:00
def main ( ) :
script = sys . argv [ 0 ]
# filename = sys.argv[2]
# filenametest = "/home/example.com.access_log"
# username = 'server'
username = str ( sys . argv [ 1 ] )
2020-02-15 22:36:26 -05:00
# Define the day of interest in the Apache common log format. Default if not specified
2020-02-10 10:03:58 +05:00
try :
2020-02-14 21:24:53 +05:00
daysago = int ( sys . argv [ 2 ] )
2020-02-15 22:36:26 -05:00
# daysago = 0
2020-02-10 10:03:58 +05:00
except :
2020-02-14 21:24:53 +05:00
daysago = 0
the_day = date . today ( ) - timedelta ( daysago )
apache_day = the_day . strftime ( ' [ %d / % b/ % Y: ' )
dcpumon_day = the_day . strftime ( ' % Y/ % b/ %d ' )
# Set variables to empty
controlpanel = ' '
domlogs_path = ' '
2020-02-10 10:03:58 +05:00
try :
2020-02-14 21:24:53 +05:00
if os . path . isfile ( ' /usr/local/cpanel/cpanel ' ) | os . path . isfile ( os . getcwd ( ) + ' /cpanel ' ) :
2020-02-15 22:36:26 -05:00
controlpanel = ' Cpanel '
2020-02-14 21:24:53 +05:00
datetime_dcpumon = date . today ( ) . strftime ( ' % Y/ % b/ %d ' ) # 2020/Feb/10
# Current Dcpumon file
dcpumon_current_log = " /var/log/dcpumon/ " + datetime_dcpumon # /var/log/dcpumon/2019/Feb/15
acesslog_sed = " -ssl_log "
if username == ' server ' :
domlogs_path = ' /usr/local/apache/domlogs/ '
else :
user_homedir = " /home/ " + username
user_accesslogs = " /home/ " + username + " /logs/ "
domlogs_path = " /usr/local/apache/domlogs/ " + username
elif os . path . isfile ( ' /usr/bin/cyberpanel ' ) | os . path . isfile ( os . getcwd ( ) + ' /cyberpanel ' ) :
2020-02-15 22:36:26 -05:00
controlpanel = ' CyberPanel '
2020-02-14 21:24:53 +05:00
acesslog_sed = " .access_log "
if username == ' server ' :
# Needs updated to glob all /home/*/logs/
domlogs_path = ' /home/username/Desktop/domlogs '
else :
# Get users homedir path
user_homedir = os . path . expanduser ( " ~ " + username )
domlogs_path = user_homedir + " /logs/ "
2020-02-10 10:03:58 +05:00
except :
controlpanel = ' Control Panel not found '
2020-02-14 21:24:53 +05:00
# Define Output file
stats_output = open ( os . getcwd ( ) + ' /stats.txt ' , " w " )
2020-02-10 10:03:58 +05:00
2020-02-14 21:24:53 +05:00
# Define log path directory
path = domlogs_path
2020-02-10 10:03:58 +05:00
2020-02-14 21:24:53 +05:00
# path = "/home/username/Desktop/domlogs"
# Get list of dir contents
logs_path_contents = os . listdir ( path )
# Get list of files only from this directory
logs = filter ( lambda f : isfile ( join ( path , f ) ) , logs_path_contents )
2020-02-10 10:03:58 +05:00
# Regex for the Apache common log format.
parts = [ # host %h :ip/hostname of the client 172.68.142.138
# indent %l (unused) :client identity via client's identd configuration -
# user %u :HTTP authenticated user ID -
# time %t :timestamp [09/Mar/2019:00:38:03 -0600]
# request "%r" :request method of request, resource requested, & protocol "POST /wp-login.php HTTP/1.1"
# status %>s :Apache status code 404
# size %b (careful,can be'-'):size of request in bytes, excluding headers 3767
# referrer "%{Referer}i" :Referer "https://www.google.com/"
# user agent "%{User-agent}i":User-Agent "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"
r ' (?P<host> \ S+) ' ,
r ' \ S+ ' ,
r ' (?P<user> \ S+) ' ,
r ' \ [(?P<time>.+) \ ] ' ,
r ' " (?P<request>.*) " ' ,
r ' (?P<status>[0-9]+) ' ,
r ' (?P<size> \ S+) ' ,
r ' " (?P<referrer>.*) " ' ,
r ' " (?P<agent>.*) " ' ,
]
pattern = re . compile ( r ' \ s+ ' . join ( parts ) + r ' \ s* \ Z ' )
# Regex for a feed request.
feed = re . compile ( r ' /all-this/( \ d \ d \ d \ d/ \ d \ d/[^/]+/)?feed/(atom/)? ' )
# Regexes for internal and Google search referrers.
internal = re . compile ( r ' https?://(www \ .)?example \ .com.* ' )
google = re . compile ( r ' https?://(www \ .)?google \ ..* ' )
# Regexes for Uptime Monitoring Robots
uptimeroboturl = re . compile ( r ' https?://(www \ .)?uptimerobot \ ..* ' )
uptimerobot = re . compile ( r ' UptimeRobot ' )
# Change Apache log items into Python types.
def pythonized ( d ) :
# Clean up the request.
d [ ' request ' ] = d [ ' request ' ] . split ( ) [ 1 ]
# Some dashes become None.
for k in ( ' user ' , ' referrer ' , ' agent ' ) :
if d [ k ] == ' - ' :
d [ k ] = None
# The size dash becomes 0.
if d [ ' size ' ] == ' - ' :
d [ ' size ' ] = 0
else :
d [ ' size ' ] = int ( d [ ' size ' ] )
# Convert the timestamp into a datetime object. Accept the server's time zone.
( time , zone ) = d [ ' time ' ] . split ( )
d [ ' time ' ] = datetime . strptime ( time , ' %d / % b/ % Y: % H: % M: % S ' )
return d
# Is this hit a page?
def ispage ( hit ) :
# Failures and redirects.
hit [ ' status ' ] = int ( hit [ ' status ' ] )
if hit [ ' status ' ] < 200 or hit [ ' status ' ] > = 300 :
return False
# Feed requests.
if feed . search ( hit [ ' request ' ] ) :
return False
# Requests that aren't GET.
# if (hit['request'])[0:3] != 'GET':
# return False
# Images, sounds, etc.
if hit [ ' request ' ] . split ( ) [ 1 ] [ - 1 ] != ' / ' :
return False
# Requests that aren't Head type. AKA uptime monitoring
if ( hit [ ' request ' ] ) [ 0 : 3 ] == ' HEAD ' :
return False
# Must be a page.
return True
# Is the referrer interesting? Internal and Google referrers are not.
def goodref ( hit ) :
if hit [ ' referrer ' ] :
return not ( google . search ( hit [ ' referrer ' ] )
or internal . search ( hit [ ' referrer ' ] ) )
else :
return False
# Is the user agent interesting? An uptime monitoring robot is not.
def goodagent ( hit ) :
if hit [ ' agent ' ] :
return not ( uptimerobot . search ( hit [ ' agent ' ] )
or uptimeroboturl . search ( hit [ ' agent ' ] ) )
else :
return False
2020-02-14 21:24:53 +05:00
# create a function which returns the value of a dictionary
def keyfunction ( k ) :
return d [ k ]
2020-02-10 10:03:58 +05:00
2020-02-14 21:24:53 +05:00
# Initialize pages for top IP's
2020-02-10 10:03:58 +05:00
pages = [ ]
2020-02-14 21:24:53 +05:00
# Initialize dictionaries for hit counters
wp_login_dict = { }
wp_cron_dict = { }
wp_xmlrpc_dict = { }
wp_admin_ajax_dict = { }
2020-02-10 10:03:58 +05:00
# Parse all the lines associated with the day of interest.
2020-02-14 21:24:53 +05:00
for log in logs :
file = os . path . join ( path , log )
text = open ( file , " r " )
wp_login_hit_count = 0
wp_cron_hit_count = 0
wp_xmlrpc_hit_count = 0
wp_admin_ajax_hit_count = 0
for line in text :
if apache_day in line :
if re . match ( " (.*)(wp-login.php)(.*) " , line ) :
wp_login_hit_count = wp_login_hit_count + 1
if re . match ( " (.*)(wp-cron.php)(.*) " , line ) :
wp_cron_hit_count = wp_cron_hit_count + 1
if re . match ( " (.*)(xmlrpc.php)(.*) " , line ) :
wp_xmlrpc_hit_count = wp_xmlrpc_hit_count + 1
if re . match ( " (.*)(admin-ajax.php)(.*) " , line ) :
wp_admin_ajax_hit_count = wp_admin_ajax_hit_count + 1
m = pattern . match ( line )
hit = m . groupdict ( )
if ispage ( hit ) :
pages . append ( pythonized ( hit ) )
else :
continue
# print >> stats_output, log + "|" + line,
# print(log + "|" + line, end="", file=stats_output)
2020-02-15 22:36:26 -05:00
# print(wp_login_hit_count)
2020-02-14 21:24:53 +05:00
log = log . replace ( ' -ssl_log ' , ' ' , 1 )
log = log . replace ( ' .access_log ' , ' ' , 1 )
2020-02-15 22:36:26 -05:00
# wp_login_dict[log] = int(wp_login_hit_count)
# wp_cron_dict[log] = int(wp_cron_hit_count)
# wp_xmlrpc_dict[log] = int(wp_xmlrpc_hit_count)
# wp_admin_ajax_dict[log] = int(wp_admin_ajax_hit_count)
# Only add hit count to dictionary if not equal to '0'
if wp_login_hit_count != ' 0 ' :
wp_login_dict [ log ] = int ( wp_login_hit_count )
if wp_cron_hit_count != ' 0 ' :
wp_cron_dict [ log ] = int ( wp_cron_hit_count )
if wp_xmlrpc_hit_count != ' 0 ' :
wp_xmlrpc_dict [ log ] = int ( wp_xmlrpc_hit_count )
if wp_admin_ajax_hit_count != ' 0 ' :
wp_admin_ajax_dict [ log ] = int ( wp_admin_ajax_hit_count )
2020-02-14 21:24:53 +05:00
# print(log)
# print("Wordpress Logins => " + str(wp_login_hit_count))
# print("Wordpress wp-cron => " + str(wp_cron_hit_count))
# print("Wordpress xmlrpc => " + str(wp_xmlrpc_hit_count))
# print("Wordpress admin-ajax => " + str(wp_admin_ajax_hit_count))
# print("===============================================================")
text . close ( )
# print(pages, file=stats_output)
print ( ' ' )
print ( ' ============================================ ' )
print ( ' Snapshot for ' + username )
print ( time . strftime ( ' % H: % M % p % Z on % b %d , % Y ' ) )
2020-02-15 22:36:26 -05:00
if controlpanel == ' Cpanel ' or controlpanel == ' CyberPanel ' :
2020-02-14 21:24:53 +05:00
print ( controlpanel + " detected " )
else :
print ( ' No control Panel detected ' )
print ( ' Accesslog path used: ' + path )
# print(dcpumon_current_log)
2020-02-10 10:03:58 +05:00
# Show the top five pages and the total.
2020-02-14 21:24:53 +05:00
print ( '''
Show top 10 pages % s ''' % the_day.strftime( ' % b %d , % Y ' ))
2020-02-10 10:03:58 +05:00
pageviews = Counter ( x [ ' request ' ] for x in pages if goodagent ( x ) )
pagestop10 = pageviews . most_common ( 10 )
for p in pagestop10 :
2020-02-14 21:24:53 +05:00
print ( ' %5d %s ' % p [ : : - 1 ] )
print ( ' %5d total ' % len ( pages ) )
print ( ' ============================================ ' )
2020-02-10 10:03:58 +05:00
# Show the top five referrers.
2020-02-14 21:24:53 +05:00
print ( '''
Show top 10 referrers % s ''' % the_day.strftime( ' % b %d , % Y ' ))
2020-02-10 10:03:58 +05:00
referrers = Counter ( x [ ' referrer ' ] for x in pages if goodref ( x ) )
referrerstop10 = referrers . most_common ( 10 )
for r in referrerstop10 :
2020-02-14 21:24:53 +05:00
print ( ' %5d %s ' % r [ : : - 1 ] )
print ( ' %5d total ' % sum ( referrers . values ( ) ) )
print ( ' ============================================ ' )
2020-02-10 10:03:58 +05:00
# Show the top 10 IPs.
2020-02-14 21:24:53 +05:00
print ( '''
Show Top 10 IPs % s ''' % the_day.strftime( ' % b %d , % Y ' ))
2020-02-10 10:03:58 +05:00
iphits = Counter ( x [ ' host ' ] for x in pages if goodagent ( x ) )
iptop10 = iphits . most_common ( 10 )
for p in iptop10 :
2020-02-14 21:24:53 +05:00
print ( ' %5d %s ' % p [ : : - 1 ] )
print ( ' %5d total hits ' % sum ( iphits . values ( ) ) )
print ( ' ============================================ ' )
2020-02-10 10:03:58 +05:00
# CMS Checks
2020-02-14 21:24:53 +05:00
print ( ' ' )
print ( ' CMS Checks ' )
print ( ' ' )
print ( ' Wordpress Checks ' )
print ( ' ============================================ ' )
d = wp_login_dict
2020-02-15 22:36:26 -05:00
# Using dictionary comprehension to find list
# keys having value in 0 will be removed from results
delete = [ key for key in d if d [ key ] == 0 ]
# delete the key
for key in delete : del d [ key ]
2020-02-14 21:24:53 +05:00
# print(d)
2020-02-15 22:36:26 -05:00
2020-02-14 21:24:53 +05:00
print ( ''' Wordpress Bruteforce Logins for wp-login.php %s ''' % the_day . strftime ( ' % b %d , % Y ' ) )
print ( ' ' )
# sort by dictionary by the values and print top 10 {key, value} pairs
for key in sorted ( d , key = keyfunction , reverse = True ) [ : 10 ] :
print ( ' %5d %s ' % ( d [ key ] , key ) )
print ( ' %5d total hits ' % sum ( dict . values ( d ) ) )
print ( ' ' )
d = wp_cron_dict
2020-02-15 22:36:26 -05:00
# Using dictionary comprehension to find list
# keys having value in 0 will be removed from results
delete = [ key for key in d if d [ key ] == 0 ]
# delete the key
for key in delete : del d [ key ]
2020-02-14 21:24:53 +05:00
print ( ''' Wordpress Cron wp-cron.php(virtual cron) checks for %s ''' % the_day . strftime ( ' % b %d , % Y ' ) )
print ( ' ' )
# sort by dictionary by the values and print top 10 {key, value} pairs
for key in sorted ( d , key = keyfunction , reverse = True ) [ : 10 ] :
print ( ' %5d %s ' % ( d [ key ] , key ) )
print ( ' %5d total hits ' % sum ( dict . values ( d ) ) )
print ( ' ' )
d = wp_xmlrpc_dict
2020-02-15 22:36:26 -05:00
# Using dictionary comprehension to find list
# keys having value in 0 will be removed from results
delete = [ key for key in d if d [ key ] == 0 ]
# delete the key
for key in delete : del d [ key ]
2020-02-14 21:24:53 +05:00
print ( ''' Wordpress XMLRPC Attacks checks for xmlrpc.php for %s ''' % the_day . strftime ( ' % b %d , % Y ' ) )
print ( ' ' )
# sort by dictionary by the values and print top 10 {key, value} pairs
for key in sorted ( d , key = keyfunction , reverse = True ) [ : 10 ] :
print ( ' %5d %s ' % ( d [ key ] , key ) )
print ( ' %5d total hits ' % sum ( dict . values ( d ) ) )
print ( ' ' )
d = wp_admin_ajax_dict
2020-02-15 22:36:26 -05:00
# Using dictionary comprehension to find list
# keys having value in 0 will be removed from results
delete = [ key for key in d if d [ key ] == 0 ]
# delete the key
for key in delete : del d [ key ]
2020-02-14 21:24:53 +05:00
print ( ''' Wordpress Heartbeat API checks for admin-ajax.php for %s ''' % the_day . strftime ( ' % b %d , % Y ' ) )
print ( ' ' )
# sort by dictionary by the values and print top 10 {key, value} pairs
for key in sorted ( d , key = keyfunction , reverse = True ) [ : 10 ] :
print ( ' %5d %s ' % ( d [ key ] , key ) )
print ( ' %5d total hits ' % sum ( dict . values ( d ) ) )
print ( ' ============================================ ' )
2020-02-10 10:03:58 +05:00
if __name__ == ' __main__ ' :
2020-02-15 22:36:26 -05:00
main ( )