Skip to content

Commit 54be87e

Browse files
committed
Added PAN Regex, File owner details in fs, unique alerts only
1 parent 63675ea commit 54be87e

File tree

8 files changed

+170
-25
lines changed

8 files changed

+170
-25
lines changed

connection.yml.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
notify:
2+
redacted: True
23
slack:
34
webhook_url: https://hooks.slack.com/services/T0XXXXXXXXXXX/BXXXXXXXX/1CIyXXXXXXXXXXXXXXX
45

fingerprint.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
Email: "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b"
22
Phone: "\\b\\+\\d{1,3}[-.]?\\d{3}[-.]?\\d{4}\\b"
33
Aadhar: "\\b\\d{4}[-.]?\\d{4}[-.]?\\d{4}\\b"
4+
PAN Number: "[A-Z]{5}[0-9]{4}[A-Z]{1}"
45
Amazon MWS Auth Token: "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
56
Amazon SNS Topic Disclosure: "arn:aws:sns:[a-z0-9\\-]+:[0-9]+:[A-Za-z0-9\\-_]+"
67
AWS Access Key ID Value: "(A3T[A-Z0-9]|AKIA|AGPA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}"
@@ -31,4 +32,4 @@ Shopify Shared Secret: "shpss_[a-fA-F0-9]{32}"
3132
Square Accesss Token: "sq0atp-[0-9A-Za-z\\-_]{22}"
3233
Square OAuth Secret: "sq0csp-[0-9A-Za-z\\-_]{43}"
3334
Twilio API Key: "(?i)twilio(.{0,20})?SK[0-9a-f]{32}"
34-
Twitter Secret: "(?i)twitter(.{0,20})?[0-9a-z]{35,44}"
35+
Twitter Secret: "(?i)twitter(.{0,20})?[0-9a-z]{35,44}"

hawk_scanner/commands/fs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
def process_file(file_path, key, results):
1010
matches = system.read_match_strings(file_path, 'fs')
11+
file_data = system.getFileData(file_path)
1112
if matches:
1213
for match in matches:
1314
results.append({
@@ -17,7 +18,8 @@ def process_file(file_path, key, results):
1718
'matches': match['matches'],
1819
'sample_text': match['sample_text'],
1920
'profile': key,
20-
'data_source': 'fs'
21+
'data_source': 'fs',
22+
'file_data': file_data
2123
})
2224

2325
def execute(args):

hawk_scanner/internals/system.py

Lines changed: 131 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from rich.console import Console
22
from rich.table import Table
3-
import yaml
4-
import re
5-
import os
6-
import argparse
7-
import requests
8-
import json
3+
import json, requests, argparse, yaml, re, datetime, os, subprocess, platform, hashlib
4+
from tinydb import TinyDB, Query
5+
from pwd import getpwuid
6+
7+
# Create a TinyDB instance for storing previous alert hashes
8+
db = TinyDB('previous_alerts.json')
99

10-
console = Console()
1110
parser = argparse.ArgumentParser(description='🦅 A powerful scanner to scan your Filesystem, S3, MySQL, PostgreSQL, MongoDB, Redis, Google Cloud Storage and Firebase storage for PII and sensitive data.')
1211
parser.add_argument('--connection', action='store', help='YAML Connection file path')
1312
parser.add_argument('--fingerprint', action='store', help='Override YAML fingerprint file path')
@@ -17,6 +16,11 @@
1716
args, extra_args = parser.parse_known_args()
1817

1918

19+
console = Console()
20+
21+
def calculate_msg_hash(msg):
22+
return hashlib.sha256(msg.encode()).hexdigest()
23+
2024
def print_info(message):
2125
console.print(f"[yellow][INFO][/yellow] {message}")
2226

@@ -35,6 +39,60 @@ def print_info(message):
3539
def print_alert(message):
3640
console.print(f"[bold red][ALERT][/bold red] {message}")
3741

42+
def get_file_owner(file_path):
43+
owner_name = ""
44+
45+
# Determine the current operating system
46+
system = platform.system()
47+
48+
if system == "Windows":
49+
try:
50+
# Run the 'dir /q' command and capture its output
51+
result = subprocess.check_output(['dir', '/q', file_path], shell=True, text=True)
52+
# Extract the line containing the file information
53+
lines = result.splitlines()
54+
file_name = os.path.basename(file_path)
55+
if len(lines) >= 6:
56+
for line in lines:
57+
if file_name in line:
58+
exploded_line = line.split(' ')
59+
owner_name = exploded_line[-2]
60+
except subprocess.CalledProcessError as e:
61+
owner_name = ""
62+
else:
63+
try:
64+
# Use the 'os.stat()' method to get the file owner on non-Windows systems
65+
file_stat = os.stat(file_path)
66+
owner_name = file_stat.st_uid # You can also use pwd.getpwuid(file_stat.st_uid).pw_name to get the username
67+
owner_name = getpwuid(owner_name).pw_name + " (" + str(owner_name) + ")"
68+
except OSError as e:
69+
owner_name = ""
70+
71+
return owner_name
72+
73+
def RedactData(input_string):
74+
if len(input_string) < 3:
75+
return input_string
76+
77+
# Calculate the number of characters to redact in the middle (half of the length)
78+
redact_count = len(input_string) // 2
79+
80+
# Split the input string into two parts: before and after the middle
81+
middle_start = len(input_string) // 2 - redact_count // 2
82+
middle_end = len(input_string) // 2 + redact_count // 2
83+
84+
before_middle = input_string[:middle_start]
85+
middle = input_string[middle_start:middle_end]
86+
after_middle = input_string[middle_end:]
87+
88+
# Redact the middle part
89+
redacted_middle = "*" * len(middle)
90+
91+
# Concatenate the parts back together
92+
redacted_string = before_middle + redacted_middle + after_middle
93+
94+
return redacted_string
95+
3896
def get_connection():
3997
if args.connection:
4098
if os.path.exists(args.connection):
@@ -112,18 +170,36 @@ def print_banner():
112170
if not args.shutup:
113171
console.print(banner)
114172

173+
connections = get_connection()
174+
115175
def match_strings(content):
116176
matched_strings = []
177+
178+
if 'notify' in connections:
179+
redacted: bool = connections.get('notify', {}).get('redacted', False)
180+
117181
for pattern_name, pattern_regex in patterns.items():
118182
print_debug(f"Matching pattern: {pattern_name}")
119183
found = {}
120184
## parse pattern_regex as Regex
121185
complied_regex = re.compile(pattern_regex, re.IGNORECASE)
122186
matches = re.findall(complied_regex, content)
187+
123188
if matches:
124189
found['pattern_name'] = pattern_name
125-
found['matches'] = matches
126-
found['sample_text'] = content[:50]
190+
redacted_matches = []
191+
if redacted:
192+
for match in matches:
193+
redacted_matches.append(RedactData(match))
194+
found['matches'] = redacted_matches
195+
else:
196+
found['matches'] = matches
197+
198+
if redacted:
199+
found['sample_text'] = RedactData(content[:50])
200+
else:
201+
found['sample_text'] = content[:50]
202+
127203
matched_strings.append(found)
128204
return matched_strings
129205

@@ -164,18 +240,61 @@ def read_match_strings(file_path, source):
164240
matched_strings = match_strings(content)
165241
return matched_strings
166242

167-
def SlackNotify(msg):
168-
connections = get_connection()
243+
def getFileData(file_path):
244+
try:
245+
# Get file metadata
246+
file_stat = os.stat(file_path)
247+
248+
# Get the username of the file's creator (Windows)
249+
creator_name = get_file_owner(file_path)
250+
# Convert timestamps to human-readable format
251+
created_time = datetime.datetime.fromtimestamp(file_stat.st_ctime)
252+
modified_time = datetime.datetime.fromtimestamp(file_stat.st_mtime)
253+
254+
# Create a dictionary with the file information
255+
## should be dict
256+
file_info = {
257+
"creator": creator_name,
258+
"created_time": created_time.strftime("%Y-%m-%d %H:%M:%S"),
259+
"modified_time": modified_time.strftime("%Y-%m-%d %H:%M:%S"),
260+
}
261+
return file_info
169262

263+
except FileNotFoundError:
264+
return json.dumps({"error": "File not found"})
265+
except Exception as e:
266+
return json.dumps({"error": str(e)})
267+
268+
269+
def SlackNotify(msg):
170270
if 'notify' in connections:
171-
slack_config = connections['notify'].get('slack', {})
271+
notify_config = connections['notify']
272+
# Check if suppress_duplicates is set to True
273+
suppress_duplicates = notify_config.get('suppress_duplicates', False)
274+
275+
if suppress_duplicates:
276+
# Calculate the hash of the message
277+
msg_hash = calculate_msg_hash(msg)
278+
279+
# Check if the message hash already exists in the previous alerts database
280+
Alert = Query()
281+
if db.contains(Alert.msg_hash == msg_hash):
282+
print_debug("Duplicate message detected. Skipping webhook trigger.")
283+
return
284+
285+
slack_config = notify_config.get('slack', {})
172286
webhook_url = slack_config.get('webhook_url', '')
287+
173288
if webhook_url != '':
174289
try:
175290
payload = {
176291
'text': msg,
177292
}
178293
headers = {'Content-Type': 'application/json'}
179294
requests.post(webhook_url, data=json.dumps(payload), headers=headers)
295+
296+
if suppress_duplicates:
297+
# Store the message hash in the previous alerts database
298+
db.insert({'msg_hash': msg_hash})
180299
except Exception as e:
181300
print_error(f"An error occurred: {str(e)}")

hawk_scanner/main.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,14 @@ def main():
115115
)
116116
AlertMsg = """
117117
*** PII Or Secret Found ***
118-
Data Source: S3 Bucket
118+
Data Source: S3 Bucket - {vulnerable_profile}
119119
Bucket: {bucket}
120120
File Path: {file_path}
121121
Pattern Name: {pattern_name}
122122
Total Exposed: {total_exposed}
123123
Exposed Values: {exposed_values}
124124
""".format(
125+
vulnerable_profile=result['profile'],
125126
bucket=result['bucket'],
126127
file_path=result['file_path'],
127128
pattern_name=result['pattern_name'],
@@ -145,7 +146,7 @@ def main():
145146
# Slack notification for MySQL
146147
AlertMsg = """
147148
*** PII Or Secret Found ***
148-
Data Source: MySQL
149+
Data Source: MySQL - {vulnerable_profile}
149150
Host: {host}
150151
Database: {database}
151152
Table: {table}
@@ -154,6 +155,7 @@ def main():
154155
Total Exposed: {total_exposed}
155156
Exposed Values: {exposed_values}
156157
""".format(
158+
vulnerable_profile=result['profile'],
157159
host=result['host'],
158160
database=result['database'],
159161
table=result['table'],
@@ -179,7 +181,7 @@ def main():
179181
# Slack notification for MongoDB
180182
AlertMsg = """
181183
*** PII Or Secret Found ***
182-
Data Source: MongoDB
184+
Data Source: MongoDB - {vulnerable_profile}
183185
Host: {host}
184186
Database: {database}
185187
Collection: {collection}
@@ -188,6 +190,7 @@ def main():
188190
Total Exposed: {total_exposed}
189191
Exposed Values: {exposed_values}
190192
""".format(
193+
vulnerable_profile=result['profile'],
191194
host=result['host'],
192195
database=result['database'],
193196
collection=result['collection'],
@@ -213,7 +216,7 @@ def main():
213216
# Slack notification for PostgreSQL
214217
AlertMsg = """
215218
*** PII Or Secret Found ***
216-
Data Source: PostgreSQL
219+
Data Source: PostgreSQL - {vulnerable_profile}
217220
Host: {host}
218221
Database: {database}
219222
Table: {table}
@@ -222,6 +225,7 @@ def main():
222225
Total Exposed: {total_exposed}
223226
Exposed Values: {exposed_values}
224227
""".format(
228+
vulnerable_profile=result['profile'],
225229
host=result['host'],
226230
database=result['database'],
227231
table=result['table'],
@@ -245,13 +249,14 @@ def main():
245249
)
246250
AlertMsg = """
247251
*** PII Or Secret Found ***
248-
Data Source: Redis
252+
Data Source: Redis - {vulnerable_profile}
249253
Host: {host}
250254
Key: {key}
251255
Pattern Name: {pattern_name}
252256
Total Exposed: {total_exposed}
253257
Exposed Values: {exposed_values}
254258
""".format(
259+
vulnerable_profile=result['profile'],
255260
host=result['host'],
256261
key=result['key'],
257262
pattern_name=result['pattern_name'],
@@ -274,13 +279,14 @@ def main():
274279
# Slack notification for Firebase/GCS
275280
AlertMsg = """
276281
*** PII Or Secret Found ***
277-
Data Source: Firebase/GCS
282+
Data Source: Firebase/GCS - {vulnerable_profile}
278283
Bucket: {bucket}
279284
File Path: {file_path}
280285
Pattern Name: {pattern_name}
281286
Total Exposed: {total_exposed}
282287
Exposed Values: {exposed_values}
283288
""".format(
289+
vulnerable_profile=result['profile'],
284290
bucket=result['bucket'],
285291
file_path=result['file_path'],
286292
pattern_name=result['pattern_name'],
@@ -302,18 +308,24 @@ def main():
302308
)
303309
AlertMsg = """
304310
*** PII Or Secret Found ***
305-
Data Source: File System
306-
File Path: {file_path}
311+
Data Source: File System - {vulnerable_profile}
312+
File Path: {file_path},
313+
File Creator: {file_creator},
314+
File Created at : {file_created},
315+
File Last Modified at : {file_last_modified},
307316
Pattern Name: {pattern_name}
308317
Total Exposed: {total_exposed}
309318
Exposed Values: {exposed_values}
310319
""".format(
320+
file_creator = result['file_data']['creator'],
321+
file_created = result['file_data']['created_time'],
322+
file_last_modified = result['file_data']['modified_time'],
323+
vulnerable_profile=result['profile'],
311324
file_path=result['file_path'],
312325
pattern_name=result['pattern_name'],
313326
total_exposed=str(len(result['matches'])),
314327
exposed_values=str(', '.join(result['matches']))
315328
)
316-
317329
system.SlackNotify(AlertMsg)
318330
else:
319331
# Handle other cases or do nothing for unsupported groups

previous_alerts.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"_default": {"1": {"msg_hash": "d6906dff68674fbdd59ded71ddca02f992d399111a256fbd87ac18e7a9205edb"}, "2": {"msg_hash": "dad68cd597c5c2236a978b928486a34a0ce1b5884e469760f1d2f8fec2793c9b"}}}

readme.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,20 @@ See how this works on Youtube - https://youtu.be/LuPXE7UJKOY
3939
pip3 install hawk-scanner
4040
```
4141

42-
## Example working command (To scan all supported services, or use fs/s3/gcs etc...)
42+
43+
## Example working command (Use all/fs/s3/gcs etc...)
4344
```bash
4445
hawk_scanner all --connection connection.yml --fingerprint fingerprint.yml --json output.json --debug
4546
```
4647

48+
### Note: Scanning Postgresql?, then you have to install some extra dependencies.
49+
50+
For scanning postgresql source, this tool requires psycopg2-binary dependency, we can't ship this dependency with main package because psycopg2-binary not works with most of the systems espically with Windows, so you have to install it manually.
51+
52+
```bash
53+
pip3 install psycopg2-binary
54+
```
55+
4756
## Building or running from source
4857

4958
HAWK Eye is a Python-based CLI tool that can be installed using the following steps:

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ redis
66
firebase-admin
77
google-cloud-core
88
google-cloud-storage
9-
# psycopg2-binary
109
pymongo==3.13.0
10+
tinydb==4.8.0

0 commit comments

Comments
 (0)