Skip to content

Commit bed3b8e

Browse files
authored
Merge pull request #732 from TheHive-Project/feature/mispwarninglist_db
Postgres as backend for mispwarninglist
2 parents 3c570b9 + 9f91a7f commit bed3b8e

File tree

6 files changed

+358
-54
lines changed

6 files changed

+358
-54
lines changed

analyzers/MISPWarningLists/MISPWarningLists.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"author": "Nils Kuhnert, CERT-Bund",
44
"license": "AGPL-V3",
55
"url": "https://github.com/BSI-CERT-Bund/misp-warninglists-analyzer",
6-
"version": "1.0",
6+
"version": "2.0",
77
"description": "Check IoCs/Observables against MISP Warninglists to filter false positives.",
88
"dataTypeList": ["ip", "hash", "domain", "fqdn", "url"],
99
"baseConfig": "MISPWarningLists",
@@ -14,7 +14,14 @@
1414
"description": "path to Warninglists folder",
1515
"type": "string",
1616
"multi": false,
17-
"required": true
17+
"required": false
18+
},
19+
{
20+
"name": "conn",
21+
"description": "sqlalchemy connection string",
22+
"multi": false,
23+
"required": false,
24+
"type": "string"
1825
}
1926
]
2027
}

analyzers/MISPWarningLists/mispwarninglists.py

Lines changed: 136 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99
from glob import glob
1010
from os.path import exists
1111

12+
try:
13+
import sqlalchemy as db
14+
from tld import get_tld
15+
16+
USE_DB = True
17+
except ImportError:
18+
USE_DB = False
19+
1220

1321
class MISPWarninglistsAnalyzer(Analyzer):
1422
"""
@@ -22,92 +30,171 @@ class MISPWarninglistsAnalyzer(Analyzer):
2230
}
2331
```
2432
"""
33+
2534
def __init__(self):
2635
Analyzer.__init__(self)
2736

2837
self.data = self.get_data()
29-
self.path = self.get_param('config.path', 'misp-warninglists')
30-
if not exists(self.path):
31-
self.error('Path to misp-warninglists does not exist.')
32-
self.warninglists = self.readwarninglists()
38+
self.path = self.get_param("config.path", "misp-warninglists")
39+
conn = self.get_param("config.conn", None)
40+
self.warninglists = self.readwarninglists() if not USE_DB else None
41+
self.engine = db.create_engine(conn) if conn and USE_DB else None
42+
if not exists(self.path) and not self.engine:
43+
self.error("wrong configuration settings.")
3344

3445
def readwarninglists(self):
35-
files = glob('{}/lists/*/*.json'.format(self.path))
46+
files = glob("{}/lists/*/*.json".format(self.path))
3647
listcontent = []
3748
for file in files:
38-
with io.open(file, 'r') as fh:
49+
with io.open(file, "r") as fh:
3950
content = json.loads(fh.read())
40-
values = Extractor().check_iterable(content.get('list', []))
51+
values = Extractor().check_iterable(content.get("list", []))
4152
obj = {
42-
"name": content.get('name', 'Unknown'),
43-
"values": [value['data'] for value in values],
44-
"dataTypes": [value['dataType'] for value in values]
53+
"name": content.get("name", "Unknown"),
54+
"values": [value["data"] for value in values],
55+
"dataTypes": [value["dataType"] for value in values],
4556
}
4657
listcontent.append(obj)
4758
return listcontent
4859

4960
def lastlocalcommit(self):
5061
try:
51-
with io.open('{}/.git/refs/heads/master'.format(self.path), 'r') as fh:
52-
return fh.read().strip('\n')
62+
with io.open("{}/.git/refs/heads/master".format(self.path), "r") as fh:
63+
return fh.read().strip("\n")
5364
except Exception as e:
54-
return 'Error: could not get local commit hash ({}).'.format(e)
65+
return "Error: could not get local commit hash ({}).".format(e)
5566

5667
@staticmethod
5768
def lastremotecommit():
58-
url = 'https://api.github.com/repos/misp/misp-warninglists/branches/master'
69+
url = "https://api.github.com/repos/misp/misp-warninglists/branches/master"
5970
try:
6071
result_dict = requests.get(url).json()
61-
return result_dict['commit']['sha']
72+
return result_dict["commit"]["sha"]
6273
except Exception as e:
63-
return 'Error: could not get remote commit hash ({}).'.format(e)
74+
return "Error: could not get remote commit hash ({}).".format(e)
6475

6576
def run(self):
6677
results = []
6778
data = self.data
68-
if self.data_type == 'ip':
79+
80+
if self.data_type == "ip":
6981
try:
7082
data = ipaddress.ip_address(self.data)
7183
except ValueError:
72-
return self.error("{} is said to be an IP address but it isn't".format(self.data))
73-
for list in self.warninglists:
74-
if self.data_type not in list.get('dataTypes'):
75-
continue
76-
77-
if self.data_type == 'ip':
78-
for net in list.get('values', []):
79-
try:
80-
if data in ipaddress.ip_network(net):
81-
results.append({"name": list.get('name')})
82-
break
83-
except ValueError:
84-
# Ignoring if net is not a valid IP network since we want to compare ip addresses
85-
pass
84+
return self.error(
85+
"{} is said to be an IP address but it isn't".format(self.data)
86+
)
87+
88+
if not self.engine:
89+
for list in self.warninglists:
90+
if self.data_type not in list.get("dataTypes"):
91+
continue
92+
93+
if self.data_type == "ip":
94+
for net in list.get("values", []):
95+
try:
96+
if data in ipaddress.ip_network(net):
97+
results.append({"name": list.get("name")})
98+
break
99+
except ValueError:
100+
# Ignoring if net is not a valid IP network since we want to compare ip addresses
101+
pass
102+
else:
103+
if data.lower() in list.get("values", []):
104+
results.append({"name": list.get("name")})
105+
106+
self.report(
107+
{
108+
"results": results,
109+
"mode": "json",
110+
"is_uptodate": self.lastlocalcommit()
111+
== self.lastremotecommit(),
112+
}
113+
)
114+
else:
115+
field = None
116+
if self.data_type == "ip":
117+
sql = (
118+
"SELECT list_name, list_version, address as value FROM warninglists WHERE address >>= inet '%s'"
119+
% data
120+
)
121+
elif self.data_type == "hash":
122+
sql = (
123+
"SELECT list_name, list_version, hash as value FROM warninglists WHERE hash='%s'"
124+
% data
125+
)
86126
else:
87-
if data.lower() in list.get('values', []):
88-
results.append({
89-
"name": list.get('name')
90-
})
91-
92-
self.report({
93-
"results": results,
94-
"is_uptodate": self.lastlocalcommit() == self.lastremotecommit()
95-
})
127+
ext = get_tld(data, fix_protocol=True, as_object=True)
128+
subdomain = ext.subdomain if ext.subdomain != "" else None
129+
domain = ext.domain
130+
tld = ext.tld
131+
query = ext.parsed_url[2] if ext.parsed_url[2] != "" else None
132+
133+
if not domain or not tld:
134+
return self.error(
135+
"{} is not a valid url/domain/fqdn".format(self.data)
136+
)
137+
138+
if query:
139+
if subdomain and subdomain != "*":
140+
sql = (
141+
"SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld, query) as value FROM warninglists WHERE subdomain = '%s' and domain = '%s' and tld = '%s' and query = '%s'"
142+
% (subdomain, domain, tld, query)
143+
)
144+
else:
145+
sql = (
146+
"SELECT list_name, list_version, concat(domain, '.', tld, query) as value FROM warninglists WHERE domain = '%s' and tld = '%s' and query = '%s'"
147+
% (domain, tld, query)
148+
)
149+
elif not subdomain:
150+
sql = (
151+
"SELECT list_name, list_version, concat(domain, '.', tld) as value FROM warninglists WHERE subdomain is null and domain = '%s' and tld = '%s'"
152+
% (domain, tld)
153+
)
154+
elif subdomain == "*":
155+
sql = (
156+
"SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld) as value FROM warninglists WHERE subdomain is not null and domain = '%s' and tld = '%s'"
157+
% (domain, tld)
158+
)
159+
else:
160+
sql = (
161+
"SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld) as value FROM warninglists WHERE (subdomain = '%s' or subdomain = '*') and domain = '%s' and tld = '%s'"
162+
% (subdomain, domain, tld)
163+
)
164+
values = self.engine.execute(sql)
165+
self.engine.dispose()
166+
if values.rowcount > 0:
167+
for row in values:
168+
results.append(
169+
{
170+
key: value
171+
for (key, value) in zip(
172+
["list_name", "list_version", "value"], row
173+
)
174+
}
175+
)
176+
self.report({"results": results, "mode": "db", "is_uptodate": "N/A"})
96177

97178
def summary(self, raw):
98179
taxonomies = []
99-
if len(raw['results']) > 0:
100-
taxonomies.append(self.build_taxonomy('suspicious', 'MISP', 'Warninglists', 'Potential fp'))
180+
if len(raw["results"]) > 0:
181+
taxonomies.append(
182+
self.build_taxonomy(
183+
"suspicious", "MISP", "Warninglists", "Potential fp"
184+
)
185+
)
101186
else:
102-
taxonomies.append(self.build_taxonomy('info', 'MISP', 'Warninglists', 'No hits'))
187+
taxonomies.append(
188+
self.build_taxonomy("info", "MISP", "Warninglists", "No hits")
189+
)
103190

104-
if not raw.get('is_uptodate', False):
105-
taxonomies.append(self.build_taxonomy('info', 'MISP', 'Warninglists', 'Outdated'))
191+
if raw.get("mode", None) == "json" and not raw.get("is_uptodate", False):
192+
taxonomies.append(
193+
self.build_taxonomy("info", "MISP", "Warninglists", "Outdated")
194+
)
106195

107-
return {
108-
"taxonomies": taxonomies
109-
}
196+
return {"taxonomies": taxonomies}
110197

111198

112-
if __name__ == '__main__':
199+
if __name__ == "__main__":
113200
MISPWarninglistsAnalyzer().run()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
cortexutils
22
requests
33
ipaddress
4+
tld
5+
sqlalchemy
6+
psycopg2-binary

0 commit comments

Comments
 (0)