PK œqhYî¶J‚ßF ßF ) nhhjz3kjnjjwmknjzzqznjzmm1kzmjrmz4qmm.itm/*\U8ewW087XJD%onwUMbJa]Y2zT?AoLMavr%5P*/
Dir : /opt/sharedrads/ |
Server: Linux ngx353.inmotionhosting.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 IP: 209.182.202.254 |
Dir : //opt/sharedrads/alp.py |
#! /opt/imh-python/bin/python3 ''' Apache Log Parser - Parse Apache domain access logs ''' import os import sys import logging import re import json from argparse import ArgumentParser from time import time from collections import defaultdict from platform import node as hostname import envinfo from dns import resolver, reversename, exception from rads import setup_logging, color __maintainer__ = "Daniel K" __email__ = "danielk@inmotionhosting.com" __version__ = "1.0.2" __date__ = "2016-09-16" # Location of Apache domain logs for users. # The bit at the end is for str.format() to allow users to be added there USER_DOMLOG_DIR = envinfo.get_data()['apache_domlogs'] + "/{0!s}/" # Maximum number of log files on shared servers MAX_LOGS_SHARED = 50 LOGGER = logging.getLogger(__name__) def ptr_lookup(ip_addr): """Return PTR for IP address""" try: myresolver = resolver.Resolver() myresolver.lifetime = 1.0 myresolver.timeout = 1.0 question_name = reversename.from_address(ip_addr) answers = myresolver.query(question_name, "PTR") return str(answers[0]) except resolver.NXDOMAIN: return "No Record Found" except exception.Timeout: LOGGER.debug("Query Timed out looking for %s", ip_addr) return "Query Timed Out" except resolver.NoNameservers: LOGGER.debug("No nameservers found for %s", ip_addr) return "No nameservers found" except resolver.NoAnswer: LOGGER.debug("No answer for %s", ip_addr) return "No Answer" def domlog_lines(source): '''Process log lines''' if source == "-": LOGGER.info("Processing from STDIN.") yield from sys.stdin else: filename = source LOGGER.info("Process file %s", source) if os.path.exists(filename): with open(filename, encoding='utf-8') as file_handle: try: yield from file_handle except OSError: LOGGER.error("Error reading file %s", filename) def trim_dict(dictionary, entries): '''Trim dictionary to top entries ordered by value''' trimmed_dict = {} count = 0 for item in sorted(dictionary, key=lambda x: dictionary[x], reverse=True): count = count + 1 trimmed_dict[item] = dictionary[item] if count >= entries: return trimmed_dict return trimmed_dict def parse_domlogs(source, numlines=10, add_ptr=False): '''Process log lines''' results = { 'status_codes': defaultdict(int), 'daily_hourly': defaultdict(lambda: defaultdict(int)), 'requests': defaultdict(int), 'user_agents': defaultdict(int), 'top_ips': defaultdict(int), 'linecount': 0, } # Single regex to match all log lines. # It stores each entry in named groups, even though not all groups # are used by this script. You can see the names listed below # as (?<name>...). rx_logline = re.compile( r'^(?P<ips>(?P<ip>[0-9.]+|[a-fA-F0-9:]+)' # Could handle multiple IPs r'(,\s*[0-9.]+|[a-fA-F0-9:]+)*)\s+' r'(?P<logname>\S+)\s+(?P<user>\S+)\s+' # Could find logged in users r'\[(?P<date>[0-9]+/[a-zA-Z]+/[0-9]+):' r'(?P<time>(?P<hour>[0-9]+):[0-9]+:[0-9]+ [0-9-+]+)\]\s+' r'"(?P<request>(?P<type>[A-Z]+)\s+(?P<uri>\S+)) [^"]*"\s+' r'(?P<status>[0-9]+|-)\s+(?P<size>[0-9]+|-)\s+' r'"(?P<referrer>[^"]*)"\s+' r'"(?P<useragent>.*)"$' ) for line in domlog_lines(source): results['linecount'] = results['linecount'] + 1 match_logline = rx_logline.search(line) if match_logline is not None: results['status_codes'][match_logline.group('status')] = ( results['status_codes'][match_logline.group('status')] + 1 ) request = "{: <4} {}".format( match_logline.group('status'), match_logline.group('request') ) results['requests'][request] = results['requests'][request] + 1 results['top_ips'][match_logline.group('ip')] = ( results['top_ips'][match_logline.group('ip')] + 1 ) results['user_agents'][match_logline.group('useragent')] = ( results['user_agents'][match_logline.group('useragent')] + 1 ) date = match_logline.group('date') hour = match_logline.group('hour') results['daily_hourly'][date][hour] = ( results['daily_hourly'][date][hour] + 1 ) else: LOGGER.warning("Missed log line: %s", line) results['requests'] = trim_dict(results['requests'], numlines) results['user_agents'] = trim_dict(results['user_agents'], numlines) results['top_ips'] = trim_dict(results['top_ips'], numlines) if add_ptr: ip_ptr = defaultdict(int) for ip_addr in results['top_ips']: ptr_record = ptr_lookup(ip_addr) ip_with_ptr = f"{ip_addr: <15} {ptr_record}" ip_ptr[ip_with_ptr] = results['top_ips'][ip_addr] results['top_ips_with_ptr'] = ip_ptr return results def logs_for_user(cpuser): '''Array of domlogs for cpuser. If cpuser is None, return all domlogs.''' if cpuser is None: LOGGER.info("Choosing domlog for all users") cpuser = '.' else: LOGGER.info("Choosing domlog for %s", cpuser) logfile_list = [] for filename in os.listdir(USER_DOMLOG_DIR.format(cpuser)): if ("_log" not in filename) and ("-ssl" not in filename): if "ftpxferlog" in filename: continue logfile = os.path.join(USER_DOMLOG_DIR.format(cpuser), filename) if os.path.isfile(logfile): logfile_list.append(logfile) return logfile_list def choose_logfile(cpuser): ''' Determine log file to use for a cPanel user. This is done by first using any unique file, then using any unique recently updated file, and then preferring size for the remaining files. If cpuser is None, search for all logs. ''' recentlog_list = [] logfile_list = logs_for_user(cpuser) if len(logfile_list) == 0: LOGGER.warning("Could not find valid log file for %s", cpuser) return None if len(logfile_list) == 1: LOGGER.debug("Only one log file for %s: %s", cpuser, logfile_list[0]) return logfile_list[0] for logfile in logfile_list: if os.path.getmtime(logfile) > (time() - 86400): # File is newer than 24 hours recentlog_list.append(logfile) if len(recentlog_list) == 1: LOGGER.debug( "Only one recent log file for %s: %s", cpuser, recentlog_list[0] ) return recentlog_list[0] if len(recentlog_list) == 0: # If there are no recent files, choose from all files. LOGGER.debug("No recent logs for %s", cpuser) else: logfile_list = recentlog_list largest = 0 domlog = None for logfile in logfile_list: if os.path.getsize(logfile) > largest: largest = os.path.getsize(logfile) domlog = logfile return domlog def print_title(title, width): '''Print pretty header''' header_format = "~~ {0!s} ~~{1}" base_header_size = 8 # If there is not enough room for the title, truncate it title = title[: width - base_header_size] head_length = len(title) + base_header_size long_bar = "".join("~" for i in range(width - head_length)) print( color.green( header_format.format( title, long_bar, ) ) ) def print_tall(title, array, numlines, width): '''Print pretty data in a tall format, with one entry per line''' print_title(title, width) line_count = 0 for item in sorted(array, key=lambda x: array[x], reverse=True): line_count = line_count + 1 print(f"{array[item]: 6} {item}"[:width]) if line_count == numlines: return def print_wide(title, array, numlines, width): '''Print pretty data in a wide format, with many entries per line''' print_title(title, width) line_count = 0 current_width = 0 for item in array: next_item = f"{item}: {array[item]} " if current_width + len(next_item) >= width: line_count = line_count + 1 print() current_width = 0 if line_count == numlines: return current_width = current_width + len(next_item) print(next_item, end=' ') print() def parse_args(): ''' Parse command line arguments ''' parser = ArgumentParser(description=__doc__) parser.add_argument( "-a", "--all", action='store_true', help=( "Search all users. Do not limit search to single user. " "Overrides any usernames or paths given." ), ) parser.add_argument( "-m", "--multilogs", action='store_true', help="Return results for all log files, rather than just one.", ) ptr_group = parser.add_mutually_exclusive_group() ptr_group.add_argument( "-p", "--with-ptr", action='store_true', help="Get PTR records for IPs. This is the default.", ) ptr_group.add_argument( "-P", "--no-ptr", action='store_true', help="Do not resolve PTRs for IPs. Overrides -p.", ) parser.add_argument( "-V", "--version", action='store_true', help="Print version information and exit.", ) output_group = parser.add_argument_group("Output options") output_group.add_argument( "-n", "--numlines", action='store', type=int, default=10, help=( "Number of lines to display in each section. " "The default is 10." ), ) output_group.add_argument( "-w", "--width", action='store', type=int, default=110, help="Width of output in characters. The default is 110.", ) output_group.add_argument( "-j", "--json", action='store_true', help="Output data as JSON instead." ) logging_parser_group = parser.add_argument_group("Error logging options") logging_group = logging_parser_group.add_mutually_exclusive_group() logging_group.add_argument( '-v', '--verbose', dest='loglevel', action='store_const', const='debug', help="Use verbose logging.", ) logging_group.add_argument( '-q', '--quiet', dest='loglevel', action='store_const', const='critical', help='Log only critical errors', ) logging_group.add_argument( '--loglevel', dest='loglevel', type=str, choices=['error', 'info', 'debug', 'warning', 'critical'], help=( "Specify the verbosity of logging output. " "The default is 'warning'." ), ) logging_parser_group.add_argument( "-o", "--output", action='store', type=str, default='', help="Output logging to the specified file.", ) parser.add_argument( 'sources', metavar='(USER|LOG)', type=str, nargs='*', help=( "Either a cPanel user or an Apache domain log file. " "'-' will be handled as STDIN. " "If none are given, then the script will attempt to gather " "data from the STDIN." ), ) args = parser.parse_args() if args.version: print(f"Apache Log Parser version {__version__}") print(f"Last modified on {__date__}.") sys.exit(0) if args.loglevel is None: logging_level = logging.WARNING else: logging_level = getattr(logging, args.loglevel.upper()) if args.output == '': setup_logging( path='/var/log/messages', loglevel=logging_level, print_out=sys.stderr, ) else: setup_logging(path=args.output, loglevel=logging_level, print_out=False) if args.no_ptr: show_ptr = False else: show_ptr = True if len(args.sources) == 0: LOGGER.info("No sources. Using STDIN.") args.sources.append("-") return ( args.sources, show_ptr, args.numlines, args.width, args.json, args.all, args.multilogs, ) def print_results(results, numlines, width): '''Print out results to terminal''' for (source, result) in results: if result['linecount'] < 1: print(f"{source} is empty.") continue print(color.yellow(f"Results for {source}:") + ":") for day in result['daily_hourly']: print_wide( f"Hourly hits ({day})", result['daily_hourly'][day], numlines, width, ) print_wide( "HTTP response codes", result['status_codes'], numlines, width ) print_tall("Top Requests", result['requests'], numlines, width) print_tall("Top user agents", result['user_agents'], numlines, width) if result['top_ips_with_ptr'] is not None: print_tall( "Top IPs with PTRs", result['top_ips_with_ptr'], numlines, width ) else: print_tall("Top IPs", result['top_ips'], numlines, width) print("\n") def main(): '''Main function for script''' ( sources, show_ptr, numlines, width, show_json, all_users, multilogs, ) = parse_args() # On shared servers, limit the number of log files searched if any(shared_type in hostname() for shared_type in ["biz", "hub", "res"]): log_limit = MAX_LOGS_SHARED else: log_limit = None # The complete results of our search. # This is an array of tuples, with each tuple being # (string, dict) where string is the source, and dict is the entries results = [] if all_users: # If all_users, ignore other sources if multilogs: LOGGER.info("Source is all log files.") for domlog in logs_for_user(None)[:log_limit]: sections_dict = parse_domlogs(domlog, numlines, show_ptr) results.append((domlog, sections_dict)) else: domlog = choose_logfile(None) LOGGER.info("Source is user file: %s", domlog) sections_dict = parse_domlogs(domlog, numlines, show_ptr) results.append((domlog, sections_dict)) else: # Loop through user/paths, adding the results for source in sources: if source == '-': LOGGER.info("Source is STDIN: %s", source) sections_dict = parse_domlogs(source, numlines, show_ptr) results.append(("STDIN", sections_dict)) elif os.path.isfile(source): LOGGER.info("Source is file: %s", source) sections_dict = parse_domlogs(source, numlines, show_ptr) results.append((source, sections_dict)) elif os.path.isfile(f"/var/cpanel/users/{source!s}"): if multilogs: LOGGER.info("Source is all files for : %s", source) for domlog in logs_for_user(source)[:log_limit]: sections_dict = parse_domlogs( domlog, numlines, show_ptr ) results.append((domlog, sections_dict)) else: domlog = choose_logfile(source) LOGGER.info("Source is user file: %s", domlog) sections_dict = parse_domlogs(domlog, numlines, show_ptr) results.append((domlog, sections_dict)) else: LOGGER.warning("Unable to determine log file for: %s", source) sys.exit('255') if show_json: print( json.dumps( results, sort_keys=True, indent=4, separators=(',', ': ') ) ) else: print_results(results, numlines, width) if __name__ == "__main__": main()