|
| 1 | +import pandas as pd |
| 2 | +import hashlib |
| 3 | +from dojo.models import Finding |
| 4 | + |
| 5 | + |
| 6 | +class BlackduckHubCSVParser(object): |
| 7 | + """ |
| 8 | + security.csv fields |
| 9 | + 1 project id -- ignore |
| 10 | + 2 version id -- ignore |
| 11 | + 3 chan version id -- ignore |
| 12 | + 4 Project name |
| 13 | + 5 Version NO -- part of channel id |
| 14 | + 6 channel version origin (i.e maven) |
| 15 | + 7 Channel version origin id YES |
| 16 | + 8 channel version origin name NO, part of ID already |
| 17 | + 9 Vulnerability id (either a CVE or some random number from VULNDB?) |
| 18 | + 10 Description |
| 19 | + 11 Published on |
| 20 | + 12 Updated on |
| 21 | + 13 Base score |
| 22 | + 14 Exploitability |
| 23 | + 15 Impact |
| 24 | + 16 Vulnerability source |
| 25 | + 17 Remediation status (NEW, DUPLICATE...) |
| 26 | + 18 Remediation target date |
| 27 | + 19 Remediation actual date |
| 28 | + 20 Remediation comment |
| 29 | + 21 URL (can be empty) |
| 30 | + 22 Security Risk |
| 31 | + """ |
| 32 | + def __init__(self, filename, test): |
| 33 | + dupes = dict() |
| 34 | + self.items = () |
| 35 | + |
| 36 | + if filename is None: |
| 37 | + self.items = () |
| 38 | + return |
| 39 | + |
| 40 | + df = pd.read_csv(filename, header=0) |
| 41 | + df = df.fillna("N/A") |
| 42 | + |
| 43 | + for i, row in df.iterrows(): |
| 44 | + cve = df.ix[i, 'Vulnerability id'] |
| 45 | + cwe = 0 # need a way to automaticall retrieve that see #1119 |
| 46 | + title = self.format_title(df, i) |
| 47 | + description = self.format_description(df, i) |
| 48 | + severity = str(df.ix[i, 'Security Risk']).title() |
| 49 | + mitigation = self.format_mitigation(df, i) |
| 50 | + impact = df.ix[i, 'Impact'] |
| 51 | + references = self.format_reference(df, i) |
| 52 | + |
| 53 | + dupe_key = hashlib.md5(title + '|' + df.ix[i, 'Vulnerability source']).hexdigest() |
| 54 | + |
| 55 | + if dupe_key in dupes: |
| 56 | + finding = dupes[dupe_key] |
| 57 | + if finding.description: |
| 58 | + finding.description += "Vulnerability ID: {}\n {}\n".format( |
| 59 | + df.ix[i, 'Vulnerability id'], df.ix[i, 'Vulnerability source']) |
| 60 | + dupes[dupe_key] = finding |
| 61 | + else: |
| 62 | + dupes[dupe_key] = True |
| 63 | + |
| 64 | + finding = Finding(title=title, |
| 65 | + cwe=int(cwe), |
| 66 | + test=test, |
| 67 | + active=False, |
| 68 | + verified=False, |
| 69 | + description=description, |
| 70 | + severity=severity, |
| 71 | + numerical_severity=Finding.get_numerical_severity( |
| 72 | + severity), |
| 73 | + mitigation=mitigation, |
| 74 | + impact=impact, |
| 75 | + references=references, |
| 76 | + url=df.ix[i, 'URL'], |
| 77 | + dynamic_finding=True) |
| 78 | + |
| 79 | + dupes[dupe_key] = finding |
| 80 | + |
| 81 | + self.items = dupes.values() |
| 82 | + |
| 83 | + def format_title(self, df, i): |
| 84 | + return "{} - {}".format(df.ix[i, 'Vulnerability id'], df.ix[i, 'Channel version origin id']) |
| 85 | + |
| 86 | + def format_description(self, df, i): |
| 87 | + description = "Published on: {}\n\n".format(str(df.ix[i, 'Published on'])) |
| 88 | + description += "Updated on: {}\n\n".format(str(df.ix[i, 'Updated on'])) |
| 89 | + description += "Base score: {}\n\n".format(str(df.ix[i, 'Base score'])) |
| 90 | + description += "Exploitability: {}\n\n".format(str(df.ix[i, 'Exploitability'])) |
| 91 | + description += "Description: {}\n".format(df.ix[i, 'Description']) |
| 92 | + |
| 93 | + return description |
| 94 | + |
| 95 | + def format_mitigation(self, df, i): |
| 96 | + mitigation = "Remediation status: {}\n".format(df.ix[i, 'Remediation status']) |
| 97 | + mitigation += "Remediation target date: {}\n".format(df.ix[i, 'Remediation target date']) |
| 98 | + mitigation += "Remdediation actual date: {}\n".format(df.ix[i, 'Remediation actual date']) |
| 99 | + mitigation += "Remdediation comment: {}\n".format(df.ix[i, 'Remediation comment']) |
| 100 | + |
| 101 | + return mitigation |
| 102 | + |
| 103 | + def format_reference(self, df, i): |
| 104 | + reference = "Source: {}\n".format(df.ix[i, 'Vulnerability source']) |
| 105 | + reference += "URL: {}\n".format(df.ix[i, 'URL']) |
| 106 | + |
| 107 | + return reference |
0 commit comments