|
| 1 | +import sys |
| 2 | +import os |
| 3 | +import pandas as pd |
| 4 | +from pandas.api.types import CategoricalDtype |
| 5 | +import matplotlib.pyplot as plt |
| 6 | +from datetime import datetime |
| 7 | +import feather |
| 8 | +import math |
| 9 | +import glob |
| 10 | +import logging |
| 11 | + |
| 12 | +def convert_size(size_bytes): |
| 13 | + if size_bytes == 0: |
| 14 | + return "0B" |
| 15 | + size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") |
| 16 | + i = int(math.floor(math.log(size_bytes, 1024))) |
| 17 | + p = math.pow(1024, i) |
| 18 | + s = round(size_bytes / p, 2) |
| 19 | + return "%s %s" % (s, size_name[i]) |
| 20 | + |
| 21 | +def extract_epoch(seq): |
| 22 | + seq_type= type(seq) |
| 23 | + return int(seq_type().join(filter(seq_type.isdigit, seq))) |
| 24 | + |
| 25 | +def convert_time(epoch_time): |
| 26 | + return datetime.fromtimestamp(extract_epoch(epoch_time)) |
| 27 | + |
| 28 | +def show_ftr_details(df): |
| 29 | + logging.info('Most recent time of dataset: ' + str(df.local_time.max())) |
| 30 | + logging.info('Oldest time of dataset: ' + str(df.local_time.min())) |
| 31 | + logging.info('Time between start and end of dataset: ' + str(df.local_time.max() - df.local_time.min())) |
| 32 | + logging.info("=====================================================") |
| 33 | + |
| 34 | + |
| 35 | +def parse_audit(source_directory, output_file): |
| 36 | + pd.set_option('display.max_rows', None) |
| 37 | + col_names = ["col0", "col1", "col2", "col3", "result", "col5", "user", "col7", "smb_operation_type", "utc_time", "local_time", "col11", "share", "path", "col14", "col15", "col16", "col17"] |
| 38 | + small_dfs = [] |
| 39 | + |
| 40 | + all_files = glob.glob(os.path.join(source_directory, "audit.*.log")) |
| 41 | + total_size = 0 |
| 42 | + |
| 43 | + for file in all_files: |
| 44 | + total_size += os.path.getsize(file) |
| 45 | + logging.info(all_files) |
| 46 | + df_from_each_file = (pd.read_csv(f, sep='|', names=col_names, index_col=False, low_memory=False, parse_dates=['local_time'], date_parser=convert_time, usecols=[4, 6, 8, 10, 12, 13]) for f in all_files) |
| 47 | + |
| 48 | + concatenated_df = pd.concat(df_from_each_file, copy=False) |
| 49 | + #concatenated_df.info() |
| 50 | + #logging.info(concatenated_df.memory_usage(deep=True) / 1e6) |
| 51 | + |
| 52 | + concatenated_df.smb_operation_type = concatenated_df.smb_operation_type.astype('category') |
| 53 | + concatenated_df.info() |
| 54 | + logging.info(concatenated_df.memory_usage(deep=True) / 1e6) |
| 55 | + |
| 56 | + |
| 57 | + logging.info('Creating feather file at: ' + str(output_file) + ".ftr") |
| 58 | + concatenated_df.reset_index().to_feather(output_file + ".ftr") |
| 59 | + #if (make_csv): |
| 60 | + # csv_file_name = output_file + ".csv" |
| 61 | + # logging.info('Creating output file at: ' + csv_file_name) |
| 62 | + # concatenated_df.reset_index().to_csv(csv_file_name, index = False, compression = 'gzip') |
| 63 | + # os.rename(csv_file_name, csv_file_name + ".gz") |
| 64 | + |
| 65 | + ftr_size = os.path.getsize(output_file + ".ftr") |
| 66 | + logging.info("Completed parsing " + str(convert_size(total_size)) + " to " + str(convert_size(ftr_size))) |
| 67 | + logging.info("=====================================================") |
| 68 | + logging.info('Most recent time of dataset: ' + str(concatenated_df.local_time.max())) |
| 69 | + logging.info('Oldest time of dataset: ' + str(concatenated_df.local_time.min())) |
| 70 | + logging.info('Time between start and end of dataset: ' + str(concatenated_df.local_time.max() - concatenated_df.local_time.min())) |
| 71 | + logging.info("=====================================================") |
| 72 | + |
| 73 | +def summarize_audit(ftr_file, time_interval): |
| 74 | + df = pd.read_feather(ftr_file) |
| 75 | + logging.info('Loading FTR file at: ' + str(ftr_file)) |
| 76 | + logging.info("=====================================================") |
| 77 | + show_ftr_details(df) |
| 78 | + logging.info('Totals per SMB operation type for this dataset:\n' + str(df['smb_operation_type'].value_counts())) |
| 79 | + logging.info("=====================================================") |
| 80 | + logging.info('Top 10 users for this dataset:\n' + str(df['user'].value_counts().nlargest(10))) |
| 81 | + logging.info("=====================================================") |
| 82 | + logging.info('Top 10 shares for this dataset:\n' + str(df['share'].value_counts().nlargest(10))) |
| 83 | + logging.info("=====================================================") |
| 84 | + logging.info('Top 10 paths for this dataset:\n' + str(df['path'].value_counts().nlargest(10))) |
| 85 | + logging.info("=====================================================") |
| 86 | + #logging.info(str(df.groupby([df.local_time.dt.floor(time_interval), 'smb_operation_type']).size())) |
| 87 | + |
| 88 | + plt.rc('legend',fontsize=6) |
| 89 | + #df.groupby([df.local_time.dt.floor('60min'), 'smb_operation_type']).size().plot() |
| 90 | + #df.groupby([df.local_time.dt.floor(time_interval), 'smb_operation_type']).size().unstack().plot(colormap='nipy_spectral').legend(loc='center left',bbox_to_anchor=(1.0, 0.5)) |
| 91 | + df.groupby([df.local_time.dt.floor(time_interval), 'smb_operation_type']).size().unstack().plot(colormap='nipy_spectral', x_compat=True).legend(loc='best') |
| 92 | + #df.groupby([df.local_time.dt.floor(time_interval), 'smb_operation_type']).size().unstack().plot(colormap='nipy_spectral', x_compat=True).legend(loc='best') |
| 93 | + plt.show() |
| 94 | + |
| 95 | +def search_audit(ftr_file, search_field, search_string, show_smb_ops): |
| 96 | + df = pd.read_feather(ftr_file) |
| 97 | + logging.info('Loading FTR file at: ' + str(ftr_file)) |
| 98 | + logging.info("=====================================================") |
| 99 | + show_ftr_details(df) |
| 100 | + #logging.info(df[df[arguments.search_field].str.contains(arguments.search_string)].to_string()) |
| 101 | + search_results = df[(df[search_field].str.contains(search_string)) & (df['smb_operation_type'].isin(show_smb_ops))].to_string() |
| 102 | + logging.info(search_results) |
| 103 | + |
| 104 | +def smb_audit(args): |
| 105 | + try: |
| 106 | + if (args.is_debug): |
| 107 | + logging.debug("Usage:\n{0}\n".format(" ".join([x for x in sys.argv]))) |
| 108 | + logging.debug("") |
| 109 | + logging.debug("All settings used:") |
| 110 | + for k,v in sorted(vars(args).items()): |
| 111 | + logging.debug("{0}: {1}".format(k,v)) |
| 112 | + |
| 113 | + if (args.function == 'Parse'): |
| 114 | + parse_audit(args.source_directory, args.output_file) |
| 115 | + |
| 116 | + if (args.function == "Summarize"): |
| 117 | + summarize_audit(args.ftr_file, args.time_interval) |
| 118 | + |
| 119 | + if (args.function == "Search"): |
| 120 | + #Create List of SMB Operations to show in Search |
| 121 | + show_smb_ops = [] |
| 122 | + if (not args.ACEChanged): |
| 123 | + show_smb_ops.append('op=ACEChanged') |
| 124 | + if (not args.ACLAdded): |
| 125 | + show_smb_ops.append('op=ACLAdded') |
| 126 | + if (not args.ACLDeleted): |
| 127 | + show_smb_ops.append('op=ACLDeleted') |
| 128 | + if (not args.AclDenied): |
| 129 | + show_smb_ops.append('op=AclDenied') |
| 130 | + if (not args.chown): |
| 131 | + show_smb_ops.append('op=chown') |
| 132 | + if (not args.create): |
| 133 | + show_smb_ops.append('op=create') |
| 134 | + if (not args.createDenied): |
| 135 | + show_smb_ops.append('op=createDenied') |
| 136 | + if (not args.delete): |
| 137 | + show_smb_ops.append('op=delete') |
| 138 | + if (not args.deleteDenied): |
| 139 | + show_smb_ops.append('op=deleteDenied') |
| 140 | + if (not args.move): |
| 141 | + show_smb_ops.append('op=move') |
| 142 | + if (not args.open): |
| 143 | + show_smb_ops.append('op=open') |
| 144 | + if (not args.OpenDenied): |
| 145 | + show_smb_ops.append('op=OpenDenied') |
| 146 | + if (not args.setattrib): |
| 147 | + show_smb_ops.append('op=setattrib') |
| 148 | + if (not args.setdacl): |
| 149 | + show_smb_ops.append('op=setdacl') |
| 150 | + if (not args.write): |
| 151 | + show_smb_ops.append('op=write') |
| 152 | + search_audit(args.ftr_file, args.search_field, args.search_string, show_smb_ops) |
| 153 | + except KeyboardInterrupt: |
| 154 | + logging.getLogger().fatal('Cancelled by user.') |
0 commit comments