|
32 | 32 |
|
33 | 33 | import logging |
34 | 34 | logger = logging.getLogger(__name__) |
| 35 | +deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication") |
35 | 36 |
|
36 | 37 |
|
37 | 38 | """ |
@@ -76,61 +77,92 @@ def is_deduplication_on_engagement_mismatch(new_finding, to_duplicate_finding): |
76 | 77 |
|
77 | 78 |
|
78 | 79 | def sync_dedupe(new_finding, *args, **kwargs): |
79 | | - logger.debug('sync_dedupe for: ' + str(new_finding.id) + |
| 80 | + deduplicationLogger.debug('sync_dedupe for: ' + str(new_finding.id) + |
80 | 81 | ":" + str(new_finding.title)) |
| 82 | + # --------------------------------------------------------- |
| 83 | + # 1) Collects all the findings that have the same: |
| 84 | + # (title and static_finding and dynamic_finding) |
| 85 | + # or (CWE and static_finding and dynamic_finding) |
| 86 | + # as the new one |
| 87 | + # (this is "cond1") |
| 88 | + # --------------------------------------------------------- |
81 | 89 | if new_finding.test.engagement.deduplication_on_engagement: |
82 | 90 | eng_findings_cwe = Finding.objects.filter( |
83 | 91 | test__engagement=new_finding.test.engagement, |
84 | 92 | cwe=new_finding.cwe, |
85 | 93 | static_finding=new_finding.static_finding, |
86 | | - dynamic_finding=new_finding.dynamic_finding, |
87 | | - date__lte=new_finding.date).exclude(id=new_finding.id).exclude( |
88 | | - cwe=0).exclude(duplicate=True) |
| 94 | + dynamic_finding=new_finding.dynamic_finding |
| 95 | + ).exclude(id=new_finding.id |
| 96 | + ).exclude(cwe=0 |
| 97 | + ).exclude(duplicate=True) |
89 | 98 | eng_findings_title = Finding.objects.filter( |
90 | 99 | test__engagement=new_finding.test.engagement, |
91 | 100 | title=new_finding.title, |
92 | 101 | static_finding=new_finding.static_finding, |
93 | | - dynamic_finding=new_finding.dynamic_finding, |
94 | | - date__lte=new_finding.date).exclude(id=new_finding.id).exclude( |
95 | | - duplicate=True) |
| 102 | + dynamic_finding=new_finding.dynamic_finding |
| 103 | + ).exclude(id=new_finding.id |
| 104 | + ).exclude(duplicate=True) |
96 | 105 | else: |
97 | 106 | eng_findings_cwe = Finding.objects.filter( |
98 | 107 | test__engagement__product=new_finding.test.engagement.product, |
99 | 108 | cwe=new_finding.cwe, |
100 | 109 | static_finding=new_finding.static_finding, |
101 | | - dynamic_finding=new_finding.dynamic_finding, |
102 | | - date__lte=new_finding.date).exclude(id=new_finding.id).exclude( |
103 | | - cwe=0).exclude(duplicate=True) |
| 110 | + dynamic_finding=new_finding.dynamic_finding |
| 111 | + ).exclude(id=new_finding.id |
| 112 | + ).exclude(cwe=0 |
| 113 | + ).exclude(duplicate=True) |
104 | 114 | eng_findings_title = Finding.objects.filter( |
105 | 115 | test__engagement__product=new_finding.test.engagement.product, |
106 | 116 | title=new_finding.title, |
107 | 117 | static_finding=new_finding.static_finding, |
108 | | - dynamic_finding=new_finding.dynamic_finding, |
109 | | - date__lte=new_finding.date).exclude(id=new_finding.id).exclude( |
110 | | - duplicate=True) |
| 118 | + dynamic_finding=new_finding.dynamic_finding |
| 119 | + ).exclude(id=new_finding.id |
| 120 | + ).exclude(duplicate=True) |
111 | 121 |
|
112 | 122 | total_findings = eng_findings_cwe | eng_findings_title |
| 123 | + deduplicationLogger.debug("Found " + |
| 124 | + str(len(eng_findings_cwe)) + " findings with same cwe, " + |
| 125 | + str(len(eng_findings_title)) + " findings with same title: " + |
| 126 | + str(len(total_findings)) + " findings with either same title or same cwe") |
113 | 127 | # total_findings = total_findings.order_by('date') |
114 | 128 |
|
115 | 129 | for find in total_findings: |
116 | 130 | flag_endpoints = False |
117 | 131 | flag_line_path = False |
118 | 132 | flag_hash = False |
119 | 133 | if is_deduplication_on_engagement_mismatch(new_finding, find): |
120 | | - logger.debug( |
| 134 | + deduplicationLogger.debug( |
121 | 135 | 'deduplication_on_engagement_mismatch, skipping dedupe.') |
122 | 136 | continue |
| 137 | + # --------------------------------------------------------- |
| 138 | + # 2) If existing and new findings have endpoints: compare them all |
| 139 | + # Else look at line+file_path |
| 140 | + # (if new finding is not static, do not deduplicate) |
| 141 | + # --------------------------------------------------------- |
123 | 142 | if find.endpoints.count() != 0 and new_finding.endpoints.count() != 0: |
124 | 143 | list1 = new_finding.endpoints.all() |
125 | 144 | list2 = find.endpoints.all() |
126 | 145 | if all(x in list1 for x in list2): |
127 | 146 | flag_endpoints = True |
128 | | - elif find.line == new_finding.line and find.file_path == new_finding.file_path and new_finding.static_finding and len( |
129 | | - new_finding.file_path) > 0: |
130 | | - flag_line_path = True |
| 147 | + elif new_finding.static_finding and len(new_finding.file_path) > 0: |
| 148 | + if(str(find.line) == new_finding.line and find.file_path == new_finding.file_path): |
| 149 | + flag_line_path = True |
| 150 | + else: |
| 151 | + deduplicationLogger.debug("no endpoints on one of the findings and file_path doesn't match") |
| 152 | + else: |
| 153 | + deduplicationLogger.debug("no endpoints on one of the findings and the new finding is either dynamic or doesn't have a file_path; Deduplication will not occur") |
131 | 154 | if find.hash_code == new_finding.hash_code: |
132 | 155 | flag_hash = True |
| 156 | + deduplicationLogger.debug( |
| 157 | + 'deduplication flags for new finding ' + str(new_finding.id) + ' and existing finding ' + str(find.id) + |
| 158 | + ' flag_endpoints: ' + str(flag_endpoints) + ' flag_line_path:' + str(flag_line_path) + ' flag_hash:' + str(flag_hash)) |
| 159 | + # --------------------------------------------------------- |
| 160 | + # 3) Findings are duplicate if (cond1 is true) and they have the same: |
| 161 | + # hash |
| 162 | + # and (endpoints or (line and file_path) |
| 163 | + # --------------------------------------------------------- |
133 | 164 | if ((flag_endpoints or flag_line_path) and flag_hash): |
| 165 | + deduplicationLogger.debug('New finding ' + str(new_finding.id) + ' is a duplicate of existing finding ' + str(find.id)) |
134 | 166 | new_finding.duplicate = True |
135 | 167 | new_finding.active = False |
136 | 168 | new_finding.verified = False |
|
0 commit comments