Skip to content

Commit 80d8344

Browse files
committed
Releasing exclude_columns support in mysql
1 parent 7db9e1a commit 80d8344

File tree

14 files changed

+40
-37
lines changed

14 files changed

+40
-37
lines changed

fingerprint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Email: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b"
2-
Phone: "\\b\\+\\d{1,3}[-.]?\\d{3}[-.]?\\d{4}\\b"
2+
Phone Number: "^\\(\\+\\d{1,2}\\s\\)?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}$"
33
Aadhar: "\\b\\d{4}[-.]?\\d{4}[-.]?\\d{4}\\b"
44
PAN Number: "[A-Z]{5}[0-9]{4}[A-Z]{1}"
55
Amazon MWS Auth Token: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"

hawk_scanner/commands/couchdb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
console = Console()
77

8-
def connect_couchdb(host, port, username, password, database):
8+
def connect_couchdb(args, host, port, username, password, database):
99
try:
1010
server = couchdb.Server(f"http://{username}:{password}@{host}:{port}/")
1111
if database not in server:
@@ -67,7 +67,7 @@ def execute(args):
6767
system.print_error(args, f"Incomplete CouchDB configuration for key: {key}")
6868
continue
6969

70-
db = connect_couchdb(host, port, username, password, database)
70+
db = connect_couchdb(args, host, port, username, password, database)
7171
if db:
7272
results += check_data_patterns(db, patterns, key, database)
7373
else:

hawk_scanner/commands/firebase.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from hawk_scanner.internals import system
66
import os
77

8-
def connect_firebase(credentials_file, bucket_name):
8+
def connect_firebase(args, credentials_file, bucket_name):
99
try:
1010
cred = credentials.Certificate(credentials_file)
1111
firebase_admin.initialize_app(cred)
@@ -31,7 +31,7 @@ def execute(args):
3131
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
3232

3333
if credentials_file and bucket_name:
34-
bucket = connect_firebase(credentials_file, bucket_name)
34+
bucket = connect_firebase(args, credentials_file, bucket_name)
3535
if bucket:
3636
for blob in bucket.list_blobs():
3737
file_name = blob.name

hawk_scanner/commands/gcs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import time
88
import yaml
99

10-
def connect_google_cloud(bucket_name, credentials_file):
10+
def connect_google_cloud(args, bucket_name, credentials_file):
1111
try:
1212
## connect using credentials file
1313
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_file
@@ -38,7 +38,7 @@ def execute(args):
3838
credentials_file = config.get('credentials_file')
3939

4040
if bucket_name:
41-
bucket = connect_google_cloud(bucket_name, credentials_file)
41+
bucket = connect_google_cloud(args, bucket_name, credentials_file)
4242
if bucket:
4343
for blob in bucket.list_blobs():
4444
file_name = blob.name

hawk_scanner/commands/gdrive.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from hawk_scanner.internals import system
66
from pydrive2.fs import GDriveFileSystem
77

8-
def connect_google_drive(credentials_file):
8+
def connect_google_drive(args, credentials_file):
99
credentials = open(credentials_file, 'r').read()
1010
credentials = json.loads(credentials)
1111
## if installed key is in the credentials file, use it
@@ -23,7 +23,7 @@ def connect_google_drive(credentials_file):
2323
print(f"Failed to connect to Google Drive: {e}")
2424
os.system("rm -rf client_secrets.json")
2525

26-
def download_file(drive, file_obj, base_path):
26+
def download_file(args, drive, file_obj, base_path):
2727
try:
2828
file_name = file_obj['title']
2929
file_id = file_obj['id']
@@ -84,7 +84,7 @@ def execute(args):
8484
folder_name = config.get('folder_name')
8585
exclude_patterns = config.get(key, {}).get('exclude_patterns', [])
8686
is_cache_enabled = config.get('cache', False)
87-
drive = connect_google_drive(credentials_file)
87+
drive = connect_google_drive(args, credentials_file)
8888
if not os.path.exists("data/google_drive"):
8989
os.makedirs("data/google_drive")
9090
if drive:

hawk_scanner/commands/gdrive_workspace.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def connect_google_drive(credentials_file, impersonate_user=None):
2222
except Exception as e:
2323
print(f"Failed to connect to Google Drive: {e}")
2424

25-
def download_file(drive, file_obj, base_path):
25+
def download_file(args, drive, file_obj, base_path):
2626
print(f"Downloading file: {file_obj['name']} to {base_path}")
2727
try:
2828
file_name = file_obj['name']
@@ -47,7 +47,7 @@ def download_file(drive, file_obj, base_path):
4747
os.makedirs(folder_path)
4848
folder_files = drive.files().list(q=f"'{file_id}' in parents").execute().get('files', [])
4949
for folder_file in folder_files:
50-
download_file(drive, folder_file, folder_path)
50+
download_file(args, drive, folder_file, folder_path)
5151
else:
5252
try:
5353
# Check if the file is a Google Docs type
@@ -126,7 +126,7 @@ def execute(args):
126126
is_cache_enabled = True
127127

128128
if is_cache_enabled:
129-
download_file(drive, file_obj, "data/google_drive/")
129+
download_file(args, drive, file_obj, "data/google_drive/")
130130

131131
matches = system.read_match_strings(args, file_path, 'gdrive_workspace')
132132
file_name = file_name.replace('-runtime.pdf', '')

hawk_scanner/commands/mongodb.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
console = Console()
66

7-
def connect_mongodb(host, port, username, password, database, uri=None):
7+
def connect_mongodb(args, host, port, username, password, database, uri=None):
88
try:
99
if uri:
1010
client = pymongo.MongoClient(uri)
@@ -23,7 +23,7 @@ def connect_mongodb(host, port, username, password, database, uri=None):
2323
return None
2424

2525

26-
def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None):
26+
def check_data_patterns(args, db, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_collections=None):
2727
results = []
2828
all_collections = db.list_collection_names()
2929

@@ -34,7 +34,7 @@ def check_data_patterns(db, patterns, profile_name, database_name, limit_start=0
3434

3535
for collection_name in collections_to_scan:
3636
if collection_name not in all_collections:
37-
system.print_warning(f"Collection {collection_name} not found in the database. Skipping.")
37+
system.print_error(args, f"Collection {collection_name} not found in the database. Skipping.")
3838
continue
3939

4040
collection = db[collection_name]
@@ -92,7 +92,7 @@ def execute(args):
9292

9393
db = connect_mongodb(host, port, username, password, database, uri)
9494
if db:
95-
results += check_data_patterns(db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections)
95+
results += check_data_patterns(args, db, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_collections=collections)
9696
else:
9797
system.print_error(args, "No MongoDB connection details found in connection.yml")
9898
else:

hawk_scanner/commands/mysql.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
console = Console()
66

7-
def connect_mysql(host, port, user, password, database):
7+
def connect_mysql(args, host, port, user, password, database):
88
try:
99
conn = pymysql.connect(
1010
host=host,
@@ -19,7 +19,7 @@ def connect_mysql(host, port, user, password, database):
1919
except Exception as e:
2020
system.print_error(args, f"Failed to connect to MySQL database at {host} with error: {e}")
2121

22-
def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
22+
def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None, exclude_columns=None):
2323
cursor = conn.cursor()
2424

2525
# Get the list of tables to scan
@@ -40,6 +40,8 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
4040
data_count = 1
4141
for row in cursor.fetchall():
4242
for column, value in zip(columns, row):
43+
if exclude_columns and column in exclude_columns:
44+
continue
4345
if value:
4446
value_str = str(value)
4547
matches = system.match_strings(args, value_str)
@@ -84,12 +86,13 @@ def execute(args):
8486
limit_start = config.get('limit_start', 0)
8587
limit_end = config.get('limit_end', 500)
8688
tables = config.get('tables', [])
89+
exclude_columns = config.get('exclude_columns', [])
8790

8891
if host and user and database:
8992
system.print_info(args, f"Checking MySQL Profile {key} and database {database}")
90-
conn = connect_mysql(host, port, user, password, database)
93+
conn = connect_mysql(args, host, port, user, password, database)
9194
if conn:
92-
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
95+
results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables, exclude_columns=exclude_columns)
9396
conn.close()
9497
else:
9598
system.print_error(args, f"Incomplete MySQL configuration for key: {key}")

hawk_scanner/commands/postgresql.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
console = Console()
66

7-
def connect_postgresql(host, port, user, password, database):
7+
def connect_postgresql(args, host, port, user, password, database):
88
try:
99
conn = psycopg2.connect(
1010
host=host,
@@ -19,7 +19,7 @@ def connect_postgresql(host, port, user, password, database):
1919
except Exception as e:
2020
system.print_error(args, f"Failed to connect to PostgreSQL database at {host} with error: {e}")
2121

22-
def check_data_patterns(conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
22+
def check_data_patterns(args, conn, patterns, profile_name, database_name, limit_start=0, limit_end=500, whitelisted_tables=None):
2323
cursor = conn.cursor()
2424

2525
# Get the list of tables to scan
@@ -35,7 +35,7 @@ def check_data_patterns(conn, patterns, profile_name, database_name, limit_start
3535
results = []
3636
for table in tables_to_scan:
3737
if table not in all_tables:
38-
system.print_warning(f"Table {table} not found in the database. Skipping.")
38+
system.print_error(args, f"Table {table} not found in the database. Skipping.")
3939
continue
4040

4141
cursor.execute(f"SELECT * FROM {table} LIMIT {limit_end} OFFSET {limit_start}")
@@ -92,9 +92,9 @@ def execute(args):
9292

9393
if host and user and password and database:
9494
system.print_info(args, f"Checking PostgreSQL Profile {key}, database {database}")
95-
conn = connect_postgresql(host, port, user, password, database)
95+
conn = connect_postgresql(args, host, port, user, password, database)
9696
if conn:
97-
results += check_data_patterns(conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
97+
results += check_data_patterns(args, conn, patterns, key, database, limit_start=limit_start, limit_end=limit_end, whitelisted_tables=tables)
9898
conn.close()
9999
else:
100100
system.print_error(args, f"Incomplete PostgreSQL configuration for key: {key}")

hawk_scanner/commands/redis.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
console = Console()
88

9-
def connect_redis(host, port, password=None):
9+
def connect_redis(args, host, port, password=None):
1010
try:
1111
r = redis.Redis(host=host, port=port, password=password)
1212
if r.ping():
@@ -61,7 +61,7 @@ def execute(args):
6161
password = config.get('password')
6262

6363
if host:
64-
redis_instance = connect_redis(host, port, password)
64+
redis_instance = connect_redis(args, host, port, password)
6565
if redis_instance:
6666
results = check_data_patterns(redis_instance, patterns, profile_name, host)
6767
redis_instance.close()

0 commit comments

Comments
 (0)