Skip to content

Commit 4d57320

Browse files
committed
Improved mongodb, mysql and postgres scan
1 parent 5343033 commit 4d57320

File tree

9 files changed

+266
-79
lines changed

9 files changed

+266
-79
lines changed

connection.yml.sample

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,23 @@ sources:
3636
port: YOUR_MYSQL_PORT
3737
user: YOUR_MYSQL_USERNAME
3838
password: YOUR_MYSQL_PASSWORD
39-
database: YOUR_MYSQL_DATABASE_NAME
39+
limit_start: 0 # Specify the starting limit for the range
40+
limit_end: 500 # Specify the ending limit for the range
41+
tables:
42+
- table1
43+
- table2
4044
postgresql:
4145
postgresql_example:
4246
host: YOUR_POSTGRESQL_HOST
4347
port: YOUR_POSTGRESQL_PORT
4448
user: YOUR_POSTGRESQL_USERNAME
4549
password: YOUR_POSTGRESQL_PASSWORD
4650
database: YOUR_POSTGRESQL_DATABASE_NAME
51+
limit_start: 0 # Specify the starting limit for the range
52+
limit_end: 500 # Specify the ending limit for the range
53+
tables:
54+
- table1
55+
- table2
4756
mongodb:
4857
mongodb_example:
4958
uri: YOUR_MONGODB_URI
@@ -52,6 +61,12 @@ sources:
5261
username: YOUR_MONGODB_USERNAME
5362
password: YOUR_MONGODB_PASSWORD
5463
database: YOUR_MONGODB_DATABASE_NAME
64+
uri: YOUR_MONGODB_URI # Use either URI or individual connection parameters
65+
limit_start: 0 # Specify the starting limit for the range
66+
limit_end: 500 # Specify the ending limit for the range
67+
collections:
68+
- collection1
69+
- collection2
5570
fs:
5671
fs_example:
5772
path: /path/to/your/filesystem/directory

hawk_scanner/commands/couchdb.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import couchdb
2+
from hawk_scanner.internals import system
3+
from rich.console import Console
4+
from rich.table import Table
5+
6+
console = Console()
7+
8+
def connect_couchdb(host, port, username, password, database):
9+
try:
10+
server = couchdb.Server(f"http://{username}:{password}@{host}:{port}/")
11+
if database not in server:
12+
system.print_error(f"Database {database} not found on CouchDB server.")
13+
return None
14+
db = server[database]
15+
system.print_info(f"Connected to CouchDB database")
16+
return db
17+
except Exception as e:
18+
system.print_error(f"Failed to connect to CouchDB database with error: {e}")
19+
return None
20+
21+
def check_data_patterns(db, patterns, profile_name, database_name):
22+
results = []
23+
for doc_id in db:
24+
document = db[doc_id]
25+
for field_name, field_value in document.items():
26+
if field_value:
27+
value_str = str(field_value)
28+
matches = system.match_strings(value_str)
29+
if matches:
30+
for match in matches:
31+
results.append({
32+
'host': f"{db.resource.credentials[1]}:{db.resource.credentials[2]}",
33+
'database': database_name,
34+
'document_id': doc_id,
35+
'field': field_name,
36+
'pattern_name': match['pattern_name'],
37+
'matches': match['matches'],
38+
'sample_text': match['sample_text'],
39+
'profile': profile_name,
40+
'data_source': 'couchdb'
41+
})
42+
43+
return results
44+
45+
def execute(args):
46+
results = []
47+
system.print_info(f"Running Checks for CouchDB Sources")
48+
connections = system.get_connection()
49+
50+
if 'sources' in connections:
51+
sources_config = connections['sources']
52+
couchdb_config = sources_config.get('couchdb')
53+
54+
if couchdb_config:
55+
patterns = system.get_fingerprint_file()
56+
57+
for key, config in couchdb_config.items():
58+
host = config.get('host')
59+
port = config.get('port', 5984) # default CouchDB port
60+
username = config.get('username')
61+
password = config.get('password')
62+
database = config.get('database')
63+
64+
if host and username and password and database:
65+
system.print_info(f"Checking CouchDB Profile {key} with host and authentication")
66+
else:
67+
system.print_error(f"Incomplete CouchDB configuration for key: {key}")
68+
continue
69+
70+
db = connect_couchdb(host, port, username, password, database)
71+
if db:
72+
results += check_data_patterns(db, patterns, key, database)
73+
else:
74+
system.print_error("No CouchDB connection details found in connection.yml")
75+
else:
76+
system.print_error("No 'sources' section found in connection.yml")
77+
return results

hawk_scanner/commands/mongodb.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import pymongo
22
from hawk_scanner.internals import system
3-
import re
43
from rich.console import Console
5-
from rich.table import Table
64

75
console = Console()
86

@@ -25,11 +23,11 @@ def connect_mongodb(host, port, username, password, database, uri=None):
2523
return None
2624

2725

28-
def check_data_patterns(db, patterns, profile_name, database_name):
26+
def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0, limit_end=500):
2927
results = []
3028
for collection_name in db.list_collection_names():
3129
collection = db[collection_name]
32-
for document in collection.find():
30+
for document in collection.find().limit(limit_end).skip(limit_start):
3331
for field_name, field_value in document.items():
3432
if field_value:
3533
value_str = str(field_value)
@@ -69,6 +67,8 @@ def execute(args):
6967
password = config.get('password')
7068
database = config.get('database')
7169
uri = config.get('uri') # Added support for URI
70+
limit_start = config.get('limit_start', 0)
71+
limit_end = config.get('limit_end', 500)
7272

7373
if uri:
7474
system.print_info(f"Checking MongoDB Profile {key} using URI")
@@ -80,7 +80,7 @@ def execute(args):
8080

8181
db = connect_mongodb(host, port, username, password, database, uri)
8282
if db:
83-
results += check_data_patterns(db, patterns, key, database)
83+
results += check_data_patterns(db, patterns, key, database, limit_start=limit_start, limit_end=limit_end)
8484
else:
8585
system.print_error("No MongoDB connection details found in connection.yml")
8686
else:

hawk_scanner/commands/mysql.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,37 @@
1-
import psycopg2
1+
import pymysql
22
from hawk_scanner.internals import system
3-
import re
43
from rich.console import Console
5-
from rich.table import Table
64

75
console = Console()
86

9-
def connect_postgresql(host, port, user, password, database):
7+
def connect_mysql(host, port, user, password, database):
108
try:
11-
conn = psycopg2.connect(
9+
conn = pymysql.connect(
1210
host=host,
1311
port=port,
1412
user=user,
1513
password=password,
1614
database=database
1715
)
1816
if conn:
19-
system.print_info(f"Connected to PostgreSQL database at {host}")
17+
system.print_info(f"Connected to MySQL database at {host}")
2018
return conn
2119
except Exception as e:
22-
system.print_error(f"Failed to connect to PostgreSQL database at {host} with error: {e}")
20+
system.print_error(f"Failed to connect to MySQL database at {host} with error: {e}")
2321

24-
def check_data_patterns(conn, patterns, profile_name, database_name):
22+
def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, tables=None):
2523
cursor = conn.cursor()
26-
cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
24+
25+
# Get the list of tables to scan
26+
cursor.execute("SHOW TABLES")
2727
tables = [table[0] for table in cursor.fetchall()]
28+
tables_to_scan = tables or [] # Use all tables if tables[] is blank or not provided
2829

2930
table_count = 1
3031

3132
results = []
32-
for table in tables:
33-
cursor.execute(f"SELECT * FROM {table}")
33+
for table in tables_to_scan:
34+
cursor.execute(f"SELECT * FROM {table} LIMIT {limit_end} OFFSET {limit_start}")
3435
columns = [column[0] for column in cursor.description]
3536

3637
data_count = 1
@@ -42,15 +43,15 @@ def check_data_patterns(conn, patterns, profile_name, database_name):
4243
if matches:
4344
for match in matches:
4445
results.append({
45-
'host': conn.dsn,
46+
'host': conn.get_host_info(),
4647
'database': database_name,
4748
'table': table,
4849
'column': column,
4950
'pattern_name': match['pattern_name'],
5051
'matches': match['matches'],
5152
'sample_text': match['sample_text'],
5253
'profile': profile_name,
53-
'data_source': 'postgresql'
54+
'data_source': 'mysql'
5455
})
5556

5657
data_count += 1
@@ -62,33 +63,40 @@ def check_data_patterns(conn, patterns, profile_name, database_name):
6263

6364
def execute(args):
6465
results = []
65-
system.print_info(f"Running Checks for PostgreSQL Sources")
66+
system.print_info(f"Running Checks for MySQL Sources")
6667
connections = system.get_connection()
6768

6869
if 'sources' in connections:
6970
sources_config = connections['sources']
70-
postgresql_config = sources_config.get('postgresql')
71+
mysql_config = sources_config.get('mysql')
7172

72-
if postgresql_config:
73+
if mysql_config:
7374
patterns = system.get_fingerprint_file()
7475

75-
for key, config in postgresql_config.items():
76+
for key, config in mysql_config.items():
7677
host = config.get('host')
7778
user = config.get('user')
78-
port = config.get('port', 5432) # default port for PostgreSQL
79+
port = config.get('port', 3306) # default port for MySQL
7980
password = config.get('password')
8081
database = config.get('database')
82+
limit_start = config.get('limit_start', 0)
83+
limit_end = config.get('limit_end', 500)
84+
tables = config.get('tables', [])
8185

8286
if host and user and password and database:
83-
system.print_info(f"Checking PostgreSQL Profile {key} and database {database}")
84-
conn = connect_postgresql(host, port, user, password, database)
87+
system.print_info(f"Checking MySQL Profile {key} and database {database}")
88+
conn = connect_mysql(host, port, user, password, database)
8589
if conn:
86-
results += check_data_patterns(conn, patterns, key, database)
90+
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, tables=tables)
8791
conn.close()
8892
else:
89-
system.print_error(f"Incomplete PostgreSQL configuration for key: {key}")
93+
system.print_error(f"Incomplete MySQL configuration for key: {key}")
9094
else:
91-
system.print_error("No PostgreSQL connection details found in connection.yml")
95+
system.print_error("No MySQL connection details found in connection.yml")
9296
else:
9397
system.print_error("No 'sources' section found in connection.yml")
94-
return results
98+
return results
99+
100+
# Example usage
101+
if __name__ == "__main__":
102+
execute(None)

hawk_scanner/commands/postgresql.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import psycopg2
22
from hawk_scanner.internals import system
33
from rich.console import Console
4-
from rich.table import Table
54

65
console = Console()
76

@@ -20,14 +19,23 @@ def connect_postgresql(host, port, user, password, database):
2019
except Exception as e:
2120
system.print_error(f"Failed to connect to PostgreSQL database at {host} with error: {e}")
2221

23-
def check_data_patterns(conn, patterns, profile_name, database_name):
22+
def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, tables=None):
2423
cursor = conn.cursor()
24+
25+
# Get the list of tables to scan
2526
cursor.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
26-
tables = [table[0] for table in cursor.fetchall()]
27+
all_tables = [table[0] for table in cursor.fetchall()]
28+
tables_to_scan = tables or all_tables # Use all tables if tables[] is blank or not provided
29+
2730
table_count = 1
31+
2832
results = []
29-
for table in tables:
30-
cursor.execute(f"SELECT * FROM {table}")
33+
for table in tables_to_scan:
34+
if table not in all_tables:
35+
system.print_warning(f"Table {table} not found in the database. Skipping.")
36+
continue
37+
38+
cursor.execute(f"SELECT * FROM {table} LIMIT {limit_end} OFFSET {limit_start}")
3139
columns = [column[0] for column in cursor.description]
3240

3341
data_count = 1
@@ -75,12 +83,15 @@ def execute(args):
7583
port = config.get('port', 5432) # default port for PostgreSQL
7684
password = config.get('password')
7785
database = config.get('database')
86+
limit_start = config.get('limit_start', 0)
87+
limit_end = config.get('limit_end', 500)
88+
tables = config.get('tables', [])
7889

7990
if host and user and password and database:
80-
system.print_info(f"Checking PostgreSQL Profile {key} and database {database}")
91+
system.print_info(f"Checking PostgreSQL Profile {key}, database {database}")
8192
conn = connect_postgresql(host, port, user, password, database)
8293
if conn:
83-
results += check_data_patterns(conn, patterns, key, database)
94+
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, tables=tables)
8495
conn.close()
8596
else:
8697
system.print_error(f"Incomplete PostgreSQL configuration for key: {key}")
@@ -89,3 +100,7 @@ def execute(args):
89100
else:
90101
system.print_error("No 'sources' section found in connection.yml")
91102
return results
103+
104+
# Example usage
105+
if __name__ == "__main__":
106+
execute(None)

hawk_scanner/internals/system.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from androguard.misc import AnalyzeAPK
12
from rich.console import Console
23
from rich.table import Table
34
import json, requests, argparse, yaml, re, datetime, os, subprocess, platform, hashlib
@@ -266,6 +267,9 @@ def read_match_strings(file_path, source):
266267
# Check if the file is an archive (zip, rar, tar, tar.gz)
267268
elif file_path.lower().endswith(('.zip', '.rar', '.tar', '.tar.gz')):
268269
content = read_archive(file_path)
270+
# Check if the file is an APK (Android application package)
271+
elif file_path.lower().endswith('.apk'):
272+
content = read_apk(file_path)
269273
else:
270274
# For other file types, read content normally
271275
with open(file_path, 'rb') as file:
@@ -278,6 +282,26 @@ def read_match_strings(file_path, source):
278282
matched_strings = match_strings(content)
279283
return matched_strings
280284

285+
def read_apk(file_path):
286+
try:
287+
# Analyze the APK file using androguard
288+
a, d, dx = AnalyzeAPK(file_path)
289+
290+
# Extract strings from the APK
291+
strings = []
292+
for method in dx.get_methods():
293+
for _, _, _, string in method.get_strings():
294+
strings.append(string)
295+
296+
# Combine all extracted strings into a single content string
297+
content = ' '.join(strings)
298+
299+
except Exception as e:
300+
print_debug(f"Error in read_apk: {e}")
301+
content = ''
302+
303+
return content
304+
281305

282306
def read_pdf(file_path):
283307
content = ''

0 commit comments

Comments
 (0)