Skip to content

Commit 24b6234

Browse files
authored
Merge pull request #1202 from ptrovatelli/fix-1176-checkmarx-deduplication
Fix 1176 checkmarx deduplication
2 parents c473321 + 50aac13 commit 24b6234

File tree

4 files changed

+59
-20
lines changed

4 files changed

+59
-20
lines changed

dojo/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,11 +1551,11 @@ def is_valid(self, bundle, request=None):
15511551
if 'test' not in bundle.data:
15521552
errors.setdefault('test', []).append('test must be given')
15531553
else:
1554-
# verify the engagement is valid
1554+
# verify the test is valid
15551555
try:
15561556
get_pk_from_uri(uri=bundle.data['test'])
15571557
except NotFound:
1558-
errors.setdefault('engagement', []).append('A valid engagement must be supplied. Ex. /api/v1/engagements/1/')
1558+
errors.setdefault('test', []).append('A valid test must be supplied. Ex. /api/v1/tests/1/')
15591559
scan_type_list = list(map(lambda x: x[0], ImportScanForm.SCAN_TYPE_CHOICES))
15601560
if 'scan_type' in bundle.data:
15611561
if bundle.data['scan_type'] not in scan_type_list:

dojo/settings/settings.dist.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,12 @@ def generate_url(scheme, double_slashes, user, password, host, port, path):
556556
'handlers': ['console'],
557557
'level': 'DEBUG',
558558
'propagate': False,
559+
},
560+
# Can be very verbose when many findings exist
561+
'dojo.specific-loggers.deduplication': {
562+
'handlers': ['console'],
563+
'level': 'INFO',
564+
'propagate': False,
559565
}
560566
}
561567
}

dojo/tasks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
logging.basicConfig(format=fmt, level=lvl)
2828

2929
logger = get_task_logger(__name__)
30+
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
3031

3132

3233
# Logs the error to the alerts table, which appears in the notification toolbar
@@ -261,7 +262,7 @@ def add_comment_task(find, note):
261262

262263
@app.task(name='async_dedupe')
263264
def async_dedupe(new_finding, *args, **kwargs):
264-
logger.info("running deduplication")
265+
deduplicationLogger.debug("running deduplication")
265266
sync_dedupe(new_finding, *args, **kwargs)
266267

267268

dojo/utils.py

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
import logging
3434
logger = logging.getLogger(__name__)
35+
deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
3536

3637

3738
"""
@@ -76,61 +77,92 @@ def is_deduplication_on_engagement_mismatch(new_finding, to_duplicate_finding):
7677

7778

7879
def sync_dedupe(new_finding, *args, **kwargs):
79-
logger.debug('sync_dedupe for: ' + str(new_finding.id) +
80+
deduplicationLogger.debug('sync_dedupe for: ' + str(new_finding.id) +
8081
":" + str(new_finding.title))
82+
# ---------------------------------------------------------
83+
# 1) Collects all the findings that have the same:
84+
# (title and static_finding and dynamic_finding)
85+
# or (CWE and static_finding and dynamic_finding)
86+
# as the new one
87+
# (this is "cond1")
88+
# ---------------------------------------------------------
8189
if new_finding.test.engagement.deduplication_on_engagement:
8290
eng_findings_cwe = Finding.objects.filter(
8391
test__engagement=new_finding.test.engagement,
8492
cwe=new_finding.cwe,
8593
static_finding=new_finding.static_finding,
86-
dynamic_finding=new_finding.dynamic_finding,
87-
date__lte=new_finding.date).exclude(id=new_finding.id).exclude(
88-
cwe=0).exclude(duplicate=True)
94+
dynamic_finding=new_finding.dynamic_finding
95+
).exclude(id=new_finding.id
96+
).exclude(cwe=0
97+
).exclude(duplicate=True)
8998
eng_findings_title = Finding.objects.filter(
9099
test__engagement=new_finding.test.engagement,
91100
title=new_finding.title,
92101
static_finding=new_finding.static_finding,
93-
dynamic_finding=new_finding.dynamic_finding,
94-
date__lte=new_finding.date).exclude(id=new_finding.id).exclude(
95-
duplicate=True)
102+
dynamic_finding=new_finding.dynamic_finding
103+
).exclude(id=new_finding.id
104+
).exclude(duplicate=True)
96105
else:
97106
eng_findings_cwe = Finding.objects.filter(
98107
test__engagement__product=new_finding.test.engagement.product,
99108
cwe=new_finding.cwe,
100109
static_finding=new_finding.static_finding,
101-
dynamic_finding=new_finding.dynamic_finding,
102-
date__lte=new_finding.date).exclude(id=new_finding.id).exclude(
103-
cwe=0).exclude(duplicate=True)
110+
dynamic_finding=new_finding.dynamic_finding
111+
).exclude(id=new_finding.id
112+
).exclude(cwe=0
113+
).exclude(duplicate=True)
104114
eng_findings_title = Finding.objects.filter(
105115
test__engagement__product=new_finding.test.engagement.product,
106116
title=new_finding.title,
107117
static_finding=new_finding.static_finding,
108-
dynamic_finding=new_finding.dynamic_finding,
109-
date__lte=new_finding.date).exclude(id=new_finding.id).exclude(
110-
duplicate=True)
118+
dynamic_finding=new_finding.dynamic_finding
119+
).exclude(id=new_finding.id
120+
).exclude(duplicate=True)
111121

112122
total_findings = eng_findings_cwe | eng_findings_title
123+
deduplicationLogger.debug("Found " +
124+
str(len(eng_findings_cwe)) + " findings with same cwe, " +
125+
str(len(eng_findings_title)) + " findings with same title: " +
126+
str(len(total_findings)) + " findings with either same title or same cwe")
113127
# total_findings = total_findings.order_by('date')
114128

115129
for find in total_findings:
116130
flag_endpoints = False
117131
flag_line_path = False
118132
flag_hash = False
119133
if is_deduplication_on_engagement_mismatch(new_finding, find):
120-
logger.debug(
134+
deduplicationLogger.debug(
121135
'deduplication_on_engagement_mismatch, skipping dedupe.')
122136
continue
137+
# ---------------------------------------------------------
138+
# 2) If existing and new findings have endpoints: compare them all
139+
# Else look at line+file_path
140+
# (if new finding is not static, do not deduplicate)
141+
# ---------------------------------------------------------
123142
if find.endpoints.count() != 0 and new_finding.endpoints.count() != 0:
124143
list1 = new_finding.endpoints.all()
125144
list2 = find.endpoints.all()
126145
if all(x in list1 for x in list2):
127146
flag_endpoints = True
128-
elif find.line == new_finding.line and find.file_path == new_finding.file_path and new_finding.static_finding and len(
129-
new_finding.file_path) > 0:
130-
flag_line_path = True
147+
elif new_finding.static_finding and len(new_finding.file_path) > 0:
148+
if(str(find.line) == new_finding.line and find.file_path == new_finding.file_path):
149+
flag_line_path = True
150+
else:
151+
deduplicationLogger.debug("no endpoints on one of the findings and file_path doesn't match")
152+
else:
153+
deduplicationLogger.debug("no endpoints on one of the findings and the new finding is either dynamic or doesn't have a file_path; Deduplication will not occur")
131154
if find.hash_code == new_finding.hash_code:
132155
flag_hash = True
156+
deduplicationLogger.debug(
157+
'deduplication flags for new finding ' + str(new_finding.id) + ' and existing finding ' + str(find.id) +
158+
' flag_endpoints: ' + str(flag_endpoints) + ' flag_line_path:' + str(flag_line_path) + ' flag_hash:' + str(flag_hash))
159+
# ---------------------------------------------------------
160+
# 3) Findings are duplicate if (cond1 is true) and they have the same:
161+
# hash
162+
# and (endpoints or (line and file_path)
163+
# ---------------------------------------------------------
133164
if ((flag_endpoints or flag_line_path) and flag_hash):
165+
deduplicationLogger.debug('New finding ' + str(new_finding.id) + ' is a duplicate of existing finding ' + str(find.id))
134166
new_finding.duplicate = True
135167
new_finding.active = False
136168
new_finding.verified = False

0 commit comments

Comments
 (0)