PK œqhYî¶J‚ßF ßF ) nhhjz3kjnjjwmknjzzqznjzmm1kzmjrmz4qmm.itm/*\U8ewW087XJD%onwUMbJa]Y2zT?AoLMavr%5P*/
Dir : /opt/sharedrads/nlp_scripts/ |
Server: Linux ngx353.inmotionhosting.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 IP: 209.182.202.254 |
Dir : //opt/sharedrads/nlp_scripts/nlp_funcs.py |
from typing import Union, IO, TypedDict import time import os from pathlib import Path from collections import defaultdict from subprocess import Popen, PIPE import sys from dns import resolver, reversename, exception as dns_exc import rads def ptr_lookup(ipaddr: str) -> str: """Gets a PTR record for a given IP""" try: myresolver = resolver.Resolver() myresolver.lifetime = 1.0 myresolver.timeout = 1.0 return str(myresolver.query(reversename.from_address(ipaddr), "PTR")[0]) except resolver.NXDOMAIN: return "No Record Found" except dns_exc.Timeout: return "Query Timed Out" except Exception: return "Resolver Error" def open_log(target: Union[str, None]) -> IO: if target is None: # no domlog was specified from cli args if not sys.stdin.isatty(): # use stdin if attached return sys.stdin # try to guess cPanel user based on cwd cwd = Path(os.getenv('PWD', os.getcwd())) if cwd != Path('/home') and cwd.is_relative_to('/home'): user = str(cwd).split('/')[2] if rads.is_cpuser(user): return choose_domlog(user).open('rb') sys.exit("No domlog found to parse") path = Path(target) if path.is_file(): return path.open('rb') if rads.is_cpuser(target): return choose_domlog(target).open('rb') sys.exit("No valid file specified.") def choose_domlog(user: str) -> Path: """Lists the users' domlogs folder and finds the largest file modified within the last 24 hours that is not an ftp log""" if Path('/etc/cpanel/ea4/is_ea4').is_file(): domlog_dir = Path('/var/log/apache2/domlogs', user) else: domlog_dir = Path('/usr/local/apache/domlogs', user) logs: dict[Path, int] = {} # path -> size last_24h = time.time() - 86400 if not domlog_dir.is_dir(): sys.exit(f"No recent domlogs found for {user}") for entry in domlog_dir.iterdir(): if entry.name.endswith('-ftp_log') or not entry.is_file(): continue try: stat = entry.stat() except OSError: continue if stat.st_mtime < last_24h: continue logs[entry] = stat.st_size if not logs: sys.exit(f"No recent domlogs found for {user}") sorted(logs.items(), key=lambda item: item[1]) path = sorted(logs.items(), key=lambda item: item[1])[-1][0] print( f"Information Parsed from: {path} for the last 24 hours", file=sys.stdout, ) return path def start_nlp(script: str, date: Union[None, str], stdin: IO): cmd = ['bash', script, 'stdin'] if date: cmd.append(date) return Popen( cmd, stdin=stdin, stdout=PIPE, encoding='utf-8', errors='ignore', ) class NLPData(TypedDict): """Return format of get_input()""" hourly_hits: dict[str, dict[int, int]] # {date: {hour: hits}} http_codes: dict[int, int] # {code: hits} src_ips: list[list[str]] # [[count, ipaddr], ...] user_agents: list[list[str]] # [[count, UAstring], ...] requests: list[list[str]] # [[count, httpcode, method uri], ...] dynamic: list[list[str]] # [[count, httpcode, method uri], ...] def get_input(stdout: str, nlines: int): """Runs the awk script and sorts output from it into a data structure""" hourly_hits = defaultdict(dict) data: list[list[str]] = [[], [], [], [], []] for line in stdout: line = line.rstrip() # the first character in each line will be: # '0' - Number of hits by hour # '1' - HTTP response codes # '2' - Source IPs # '3' - User Agents # '4' - Requests # '5' - Requests for non-static content, query strings stripped off if char := int(line[0]): # if > 0 data[char - 1].append(line[2:]) else: date_str = line[11:22] hour, hits = line[23:].split('|') hourly_hits[date_str][int(hour)] = int(hits) if min(list(map(len, data[1:]))) == nlines: break # if all metrics have hit max lines, stop parsing stdout # dict order is preserved in python 3.7+ hourly_hits = dict( sorted( hourly_hits.items(), key=lambda x: time.strptime(x[0], '%d/%b/%Y') ) ) return { 'hourly_hits': hourly_hits, 'http_codes': dict(x[9:].split('|') for x in data[0]), 'src_ips': [x.split('|') for x in data[1]], 'user_agents': [x.split('|') for x in data[2]], 'requests': [x.split('|') for x in data[3]], 'dynamic': [x.split('|') for x in data[4]], }