Skip to content

Commit 94d6fbd

Browse files
committed
Added programmatic support in all modules
1 parent 5c1d8d9 commit 94d6fbd

File tree

16 files changed

+644
-606
lines changed

16 files changed

+644
-606
lines changed

hawk_scanner/commands/couchdb.py

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def check_data_patterns(db, patterns, profile_name, database_name):
2525
for field_name, field_value in document.items():
2626
if field_value:
2727
value_str = str(field_value)
28-
matches = system.match_strings(value_str)
28+
matches = system.analyze_strings(value_str, 'couchdb')
2929
if matches:
3030
for match in matches:
3131
results.append({
@@ -42,36 +42,39 @@ def check_data_patterns(db, patterns, profile_name, database_name):
4242

4343
return results
4444

45-
def execute(args):
46-
results = []
47-
system.print_info(f"Running Checks for CouchDB Sources")
48-
connections = system.get_connection()
45+
def execute(args, programmatic=False):
46+
try:
47+
results = []
48+
system.print_info(f"Running Checks for CouchDB Sources")
49+
connections = system.get_connection(args, programmatic)
4950

50-
if 'sources' in connections:
51-
sources_config = connections['sources']
52-
couchdb_config = sources_config.get('couchdb')
51+
if 'sources' in connections:
52+
sources_config = connections['sources']
53+
couchdb_config = sources_config.get('couchdb')
5354

54-
if couchdb_config:
55-
patterns = system.get_fingerprint_file()
55+
if couchdb_config:
56+
patterns = system.get_fingerprint_file(args, programmatic)
5657

57-
for key, config in couchdb_config.items():
58-
host = config.get('host')
59-
port = config.get('port', 5984) # default CouchDB port
60-
username = config.get('username')
61-
password = config.get('password')
62-
database = config.get('database')
58+
for key, config in couchdb_config.items():
59+
host = config.get('host')
60+
port = config.get('port', 5984) # default CouchDB port
61+
username = config.get('username')
62+
password = config.get('password')
63+
database = config.get('database')
6364

64-
if host and username and password and database:
65-
system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
66-
else:
67-
system.print_error(f"Incomplete CouchDB configuration for key: {key}")
68-
continue
65+
if host and username and password and database:
66+
system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
67+
else:
68+
system.print_error(f"Incomplete CouchDB configuration for key: {key}")
69+
continue
6970

70-
db = connect_couchdb(host, port, username, password, database)
71-
if db:
72-
results += check_data_patterns(db, patterns, key, database)
71+
db = connect_couchdb(host, port, username, password, database)
72+
if db:
73+
results += check_data_patterns(db, patterns, key, database)
74+
else:
75+
system.print_error("No CouchDB connection details found in connection.yml")
7376
else:
74-
system.print_error("No CouchDB connection details found in connection.yml")
75-
else:
76-
system.print_error("No 'sources' section found in connection.yml")
77+
system.print_error("No 'sources' section found in connection.yml")
78+
except Exception as e:
79+
system.print_error(f"Failed to execute CouchDB checks with error: {e}")
7780
return results

hawk_scanner/commands/firebase.py

Lines changed: 64 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -15,75 +15,79 @@ def connect_firebase(credentials_file, bucket_name):
1515
except Exception as e:
1616
print(f"Failed to connect to Firebase bucket: {e}")
1717

18-
def execute(args):
19-
results = []
20-
shouldDownload = True
21-
connections = system.get_connection()
18+
def execute(args, programmatic=False):
19+
try:
20+
results = []
21+
shouldDownload = True
22+
connections = system.get_connection(args, programmatic)
23+
fingerprints = system.get_fingerprint_file(args, programmatic)
2224

23-
if 'sources' in connections:
24-
sources_config = connections['sources']
25-
firebase_config = sources_config.get('firebase')
25+
if 'sources' in connections:
26+
sources_config = connections['sources']
27+
firebase_config = sources_config.get('firebase')
2628

27-
if firebase_config:
28-
for key, config in firebase_config.items():
29-
credentials_file = config.get('credentials_file')
30-
bucket_name = config.get('bucket_name')
31-
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
29+
if firebase_config:
30+
for key, config in firebase_config.items():
31+
credentials_file = config.get('credentials_file')
32+
bucket_name = config.get('bucket_name')
33+
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
3234

33-
if credentials_file and bucket_name:
34-
bucket = connect_firebase(credentials_file, bucket_name)
35-
if bucket:
36-
for blob in bucket.list_blobs():
37-
file_name = blob.name
38-
## get unique etag or hash of file
39-
remote_etag = blob.etag
40-
system.print_debug(f"Remote etag: {remote_etag}")
35+
if credentials_file and bucket_name:
36+
bucket = connect_firebase(credentials_file, bucket_name)
37+
if bucket:
38+
for blob in bucket.list_blobs():
39+
file_name = blob.name
40+
## get unique etag or hash of file
41+
remote_etag = blob.etag
42+
system.print_debug(f"Remote etag: {remote_etag}")
4143

42-
if system.should_exclude_file(file_name, exclude_patterns):
43-
continue
44+
if system.should_exclude_file(file_name, exclude_patterns):
45+
continue
4446

45-
file_path = f"data/firebase/{remote_etag}-{file_name}"
46-
os.makedirs(os.path.dirname(file_path), exist_ok=True)
47+
file_path = f"data/firebase/{remote_etag}-{file_name}"
48+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
4749

48-
if config.get("cache") == True:
49-
if os.path.exists(file_path):
50-
shouldDownload = False
51-
local_etag = file_path.split('/')[-1].split('-')[0]
52-
system.print_debug(f"Local etag: {local_etag}")
53-
system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
54-
if remote_etag != local_etag:
55-
system.print_debug(f"File in firebase bucket has changed, downloading it again...")
56-
shouldDownload = True
57-
else:
50+
if config.get("cache") == True:
51+
if os.path.exists(file_path):
5852
shouldDownload = False
53+
local_etag = file_path.split('/')[-1].split('-')[0]
54+
system.print_debug(f"Local etag: {local_etag}")
55+
system.print_debug(f"File already exists in cache, using it. You can disable cache by setting 'cache: false' in connection.yml")
56+
if remote_etag != local_etag:
57+
system.print_debug(f"File in firebase bucket has changed, downloading it again...")
58+
shouldDownload = True
59+
else:
60+
shouldDownload = False
5961

60-
if shouldDownload:
61-
file_path = f"data/firebase/{remote_etag}-{file_name}"
62-
system.print_debug(f"Downloading file: {file_name} to {file_path}...")
63-
blob.download_to_filename(file_path)
64-
65-
matches = system.read_match_strings(file_path, 'google_cloud_storage')
66-
if matches:
67-
for match in matches:
68-
results.append({
69-
'bucket': bucket_name,
70-
'file_path': file_name,
71-
'pattern_name': match['pattern_name'],
72-
'matches': match['matches'],
73-
'sample_text': match['sample_text'],
74-
'profile': key,
75-
'data_source': 'firebase'
76-
})
62+
if shouldDownload:
63+
file_path = f"data/firebase/{remote_etag}-{file_name}"
64+
system.print_debug(f"Downloading file: {file_name} to {file_path}...")
65+
blob.download_to_filename(file_path)
66+
67+
matches = system.analyze_file(file_path, 'google_cloud_storage', connections, fingerprints, programmatic=programmatic)
68+
if matches:
69+
for match in matches:
70+
results.append({
71+
'bucket': bucket_name,
72+
'file_path': file_name,
73+
'pattern_name': match['pattern_name'],
74+
'matches': match['matches'],
75+
'sample_text': match['sample_text'],
76+
'profile': key,
77+
'data_source': 'firebase'
78+
})
7779

80+
else:
81+
system.print_error(f"Failed to connect to Firebase bucket: {bucket_name}")
7882
else:
79-
system.print_error(f"Failed to connect to Firebase bucket: {bucket_name}")
80-
else:
81-
system.print_error(f"Incomplete Firebase configuration for key: {key}")
83+
system.print_error(f"Incomplete Firebase configuration for key: {key}")
84+
else:
85+
system.print_error("No Firebase connection details found in connection file")
8286
else:
83-
system.print_error("No Firebase connection details found in connection file")
84-
else:
85-
system.print_error("No 'sources' section found in connection.yml")
86-
87-
if config.get("cache") == False:
88-
os.system("rm -rf data/firebase")
87+
system.print_error("No 'sources' section found in connection.yml")
88+
89+
if config.get("cache") == False:
90+
os.system("rm -rf data/firebase")
91+
except Exception as e:
92+
print(f"Failed to connect to Firebase bucket: {e}")
8993
return results

hawk_scanner/commands/fs.py

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
import argparse
1+
import argparse, os, concurrent.futures, time
22
from google.cloud import storage
33
from rich.console import Console
44
from hawk_scanner.internals import system
5-
import os
6-
import concurrent.futures
7-
import time
85

9-
def process_file(file_path, key, results):
10-
matches = system.read_match_strings(file_path, 'fs')
6+
def process_file(file_path, key, connections, fingerprints, programmatic=False):
7+
matches = system.analyze_file(file_path, 'fs', connections, fingerprints, programmatic=programmatic)
118
file_data = system.getFileData(file_path)
9+
results = []
1210
if matches:
1311
for match in matches:
1412
results.append({
@@ -21,43 +19,49 @@ def process_file(file_path, key, results):
2119
'data_source': 'fs',
2220
'file_data': file_data
2321
})
22+
return results
2423

25-
def execute(args):
26-
results = []
27-
connections = system.get_connection()
24+
def execute(args, programmatic=False):
25+
try:
26+
results = []
27+
connections = system.get_connection(args, programmatic)
28+
fingerprints = system.get_fingerprint_file(args, programmatic)
2829

29-
if 'sources' in connections:
30-
sources_config = connections['sources']
31-
fs_config = sources_config.get('fs')
32-
if fs_config:
33-
for key, config in fs_config.items():
34-
if 'path' not in config:
35-
system.print_error(f"Path not found in fs profile '{key}'")
36-
continue
37-
path = config.get('path')
38-
if not os.path.exists(path):
39-
system.print_error(f"Path '{path}' does not exist")
40-
41-
exclude_patterns = fs_config.get(key, {}).get('exclude_patterns', [])
42-
start_time = time.time()
43-
files = system.list_all_files_iteratively(path, exclude_patterns)
44-
45-
# Use ThreadPoolExecutor for parallel processing
46-
file_count = 0
47-
with concurrent.futures.ThreadPoolExecutor() as executor:
48-
futures = []
49-
for file_path in files:
50-
file_count += 1
51-
futures.append(executor.submit(process_file, file_path, key, results))
30+
if 'sources' in connections:
31+
sources_config = connections['sources']
32+
fs_config = sources_config.get('fs')
33+
if fs_config:
34+
for key, config in fs_config.items():
35+
if 'path' not in config:
36+
system.print_error(f"Path not found in fs profile '{key}'")
37+
continue
38+
path = config.get('path')
39+
if not os.path.exists(path):
40+
system.print_error(f"Path '{path}' does not exist")
41+
42+
exclude_patterns = fs_config.get(key, {}).get('exclude_patterns', [])
43+
start_time = time.time()
44+
files = system.list_all_files_iteratively(path, exclude_patterns)
5245

53-
# Wait for all tasks to complete
54-
concurrent.futures.wait(futures)
55-
end_time = time.time()
56-
system.print_info(f"Time taken to analyze {file_count} files: {end_time - start_time} seconds")
46+
# Use ThreadPoolExecutor for parallel processing
47+
file_count = 0
48+
with concurrent.futures.ThreadPoolExecutor() as executor:
49+
futures = []
50+
for file_path in files:
51+
file_count += 1
52+
results += process_file(file_path, key, connections, fingerprints, programmatic=programmatic)
53+
54+
# Wait for all tasks to complete
55+
concurrent.futures.wait(futures)
56+
end_time = time.time()
57+
elapsed_time = round(end_time - start_time, 2)
58+
system.print_info(f"Time taken to analyze {file_count} files: {elapsed_time} seconds")
59+
else:
60+
system.print_error("No filesystem 'fs' connection details found in connection.yml")
5761
else:
58-
system.print_error("No filesystem 'fs' connection details found in connection.yml")
59-
else:
60-
system.print_error("No 'sources' section found in connection.yml")
62+
system.print_error("No 'sources' section found in connection.yml")
63+
except Exception as e:
64+
system.print_error(f"Error in executing filesystem checks: {e}")
6165
return results
6266

6367
if __name__ == "__main__":

0 commit comments

Comments
 (0)