Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ repos:
hooks:
- id: trailing-whitespace
- id: check-added-large-files
exclude: ^config/local_ti_files/known_fp_hashes\.csv$
- id: check-docstring-first
- id: check-merge-conflict
- id: end-of-file-fixer
- id: detect-private-key
exclude: .*dataset/.*|
exclude: .*dataset/.* |
(?x)(
^config/$|
.*test.* |
Expand Down
8,220 changes: 8,220 additions & 0 deletions config/local_ti_files/known_fp_md5_hashes.csv

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions docs/detection_modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,14 @@ Example:
"6734f37431670b3ab4292b8f60f29984", "high", "Trickbot Malwar"


### Whitelisting known FP hashes

To avoid false positive "Malicious downloaded file" detections, before looking up MD5 hashes of each downloaded file online, Slips checks if the given hash is part of a known FP.

The list of known FP MD5 hashes is at config/local_ti_files/known_fp_md5_hashes.csv. This list is taken from https://github.com/Neo23x0/ti-falsepositives/tree/master

If the hash is a part of that list, Slips doesn't look it up.

### Adding your own remote feed


Expand Down
76 changes: 41 additions & 35 deletions modules/threat_intelligence/threat_intelligence.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def init(self):
self.urlhaus = URLhaus(self.db)
self.spamhaus = Spamhaus(self.db)
self.pending_queries = multiprocessing.Queue()
self.calls_thread = threading.Thread(
self.pending_circllu_calls_thread = threading.Thread(
target=self.handle_pending_queries, daemon=True
)
self.circllu = Circllu(self.db, self.pending_queries)
Expand Down Expand Up @@ -559,6 +559,23 @@ def set_evidence_malicious_domain(
def is_valid_threat_level(self, threat_level):
return threat_level in utils.threat_levels

def parse_known_fp_hashes(self, fullpath: str):
fp_hashes = {}
with open(fullpath) as fps:
# skip comments
for line in fps:
if line.startswith("#"):
continue

# split the line into parts
parts = line.split(", ")
description = parts[0]
hashes = parts[1:]
for hash in hashes:
fp_hashes[hash] = description

self.db.store_known_fp_md5_hashes(fp_hashes)

def parse_local_ti_file(self, ti_file_path: str) -> bool:
"""Parses a local threat intelligence (TI) file to extract
and store various indicators of compromise (IoCs), including IP
Expand Down Expand Up @@ -824,34 +841,20 @@ def parse_ja3_file(self, path):
return True

def parse_jarm_file(self, path):
"""Parses a file containing JARM hashes, their associated threat levels, and
descriptions, then stores this information in the database. The file is expected
to follow a specific format where each line contains a JARM hash, its threat
level, and a descriptive text, separated by commas.
"""
Parses a file of JARM hashes with their threat levels and descriptions, then stores the data in the database.

Parameters:
path (str): The absolute path to the local file containing
JARM hashes.

path (str): Absolute path to the JARM hash file.
Returns:
bool: Always returns True to indicate the method has executed.
This behavior could be modified in the future to reflect the
success status of parsing and database storage operations.

This method processes each line of the provided file, skipping any
lines that are commented out or improperly formatted. It validates
the threat level of each JARM hash against a predefined list of
valid levels,
defaulting to 'medium' if the provided level is not recognized.
bool: Always True, indicating execution success (may change in the future).
Details:

Side Effects:
- Populates the database with new JARM hash records extracted
from the provided file. Existing records for a JARM hash are
not explicitly handled in this method, so duplicate entries
could occur if not managed elsewhere.
- Logs the progress of reading the file, including a message
indicating the start of the process and any errors related to
invalid line formats.
Processes each line, skipping comments and invalid formats.
Validates threat levels, defaulting to 'medium' if unrecognized.
Populates the database with parsed JARM hash records (duplicates are not handled).
Logs progress, including errors for invalid lines.
"""
filename = os.path.basename(path)
jarm_dict = {}
Expand Down Expand Up @@ -898,7 +901,6 @@ def parse_jarm_file(self, path):
"threat_level": threat_level,
}
)
# Add all loaded JARM to the database
self.db.add_jarm_to_IoC(jarm_dict)
return True

Expand Down Expand Up @@ -1407,6 +1409,11 @@ def is_malicious_hash(self, flow_info: dict):
# .. }
return

if self.db.is_known_fp_md5_hash():
# this is a known FP https://github.com/Neo23x0/ti-falsepositives/tree/master
# its benign so dont look it up
return

if blacklist_details := self.search_online_for_hash(flow_info):
# the md5 appeared in a blacklist
# update the blacklist_details dict with uid,
Expand Down Expand Up @@ -1706,16 +1713,14 @@ def update_local_file(self, filename):
of the TI file.
"""
fullpath = os.path.join(self.path_to_local_ti_files, filename)
parsers = {
"own_malicious_iocs.csv": self.parse_local_ti_file,
"own_malicious_JA3.csv": self.parse_ja3_file,
"own_malicious_JARM.csv": self.parse_jarm_file,
"known_fp_md5_hashes.csv": self.parse_known_fp_hashes,
}
if filehash := self.should_update_local_ti_file(fullpath):
if "JA3" in filename:
# Load updated data to the database
self.parse_ja3_file(fullpath)
elif "JARM" in filename:
# Load updated data to the database
self.parse_jarm_file(fullpath)
else:
# Load updated data to the database
self.parse_local_ti_file(fullpath)
parsers[filename](fullpath)
# Store the new etag and time of file in the database
malicious_file_info = {"hash": filehash}
self.db.set_ti_feed_info(filename, malicious_file_info)
Expand Down Expand Up @@ -1767,11 +1772,12 @@ def pre_main(self):
"own_malicious_iocs.csv",
"own_malicious_JA3.csv",
"own_malicious_JARM.csv",
"known_fp_md5_hashes.csv",
)
for local_file in local_files:
self.update_local_file(local_file)

self.calls_thread.start()
self.pending_circllu_calls_thread.start()

def main(self):
# The channel can receive an IP address or a domain name
Expand Down
6 changes: 6 additions & 0 deletions slips_files/core/database/database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ def get_ip_info(self, *args, **kwargs):
def set_new_ip(self, *args, **kwargs):
return self.rdb.set_new_ip(*args, **kwargs)

def store_known_fp_md5_hashes(self, *args, **kwargs):
return self.rdb.store_known_fp_md5_hashes(*args, **kwargs)

def is_known_fp_md5_hash(self, *args, **kwargs):
return self.rdb.is_known_fp_md5_hash(*args, **kwargs)

def ask_for_ip_info(self, *args, **kwargs):
return self.rdb.ask_for_ip_info(*args, **kwargs)

Expand Down
1 change: 1 addition & 0 deletions slips_files/core/database/redis_db/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Constants:
DOMAINS_INFO = "DomainsInfo"
IPS_INFO = "IPsInfo"
PROCESSED_FLOWS = "processed_flows_so_far"
KNOWN_FPS = "known_fps"


class Channels:
Expand Down
9 changes: 9 additions & 0 deletions slips_files/core/database/redis_db/ioc_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
List,
Tuple,
Union,
Optional,
)


Expand Down Expand Up @@ -119,6 +120,14 @@ def set_ti_feed_info(self, file, data):
data = json.dumps(data)
self.rcache.hset(self.constants.TI_FILES_INFO, file, data)

def store_known_fp_md5_hashes(self, fps: Dict[str, List[str]]):
self.rcache.hmset(self.constants.KNOWN_FPS, fps)

def is_known_fp_md5_hash(self, hash: str) -> Optional[str]:
"""returns the description of the given hash if it is a FP. and
returns Fals eif the hash is not a FP"""
return self.rcache.hmget(self.constants.KNOWN_FPS, hash)

def delete_ips_from_IoC_ips(self, ips: List[str]):
"""
Delete the given IPs from IoC
Expand Down
12 changes: 10 additions & 2 deletions tests/test_threat_intelligence.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,7 @@ def test_pre_main(mocker):
threatintel = ModuleFactory().create_threatintel_obj()
mocker.patch.object(threatintel, "update_local_file")
threatintel.pre_main()
assert threatintel.update_local_file.call_count == 3
assert threatintel.update_local_file.call_count == 4


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1178,7 +1178,7 @@ def test_is_malicious_hash(
recording evidence of malicious file hashes.
"""
threatintel = ModuleFactory().create_threatintel_obj()

threatintel.db.is_known_fp_md5_hash.return_value = False
mock_search_online_for_hash = mocker.patch.object(
threatintel, "search_online_for_hash"
)
Expand All @@ -1197,11 +1197,19 @@ def test_is_malicious_hash(
"twid": "timewindow1",
}
mock_search_online_for_hash.return_value = search_online_result

threatintel.is_malicious_hash(flow_info)

assert threatintel.db.set_evidence.called == expected_set_evidence_call


def test_is_malicious_hash_known_fp_md5():
threatintel = ModuleFactory().create_threatintel_obj()
threatintel.db.is_known_fp_md5_hash.return_value = True
flow = {"flow": {"md5": "c0eec84d09bbb7f4cd1a8896f9dff718"}}
assert threatintel.is_malicious_hash(flow) is None


@pytest.mark.parametrize(
"url, result, is_malicious",
[
Expand Down