Skip to content

Commit a756c2b

Browse files
committed
new: [search engine] add description indexes of domains, images and screenshots
1 parent 8345e4b commit a756c2b

File tree

4 files changed

+73
-3
lines changed

4 files changed

+73
-3
lines changed

bin/lib/objects/Domains.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,18 @@ def get_description(self, model=None):
349349

350350
## -Descriptions- ##
351351

352+
## Search ##
353+
354+
def get_search_description_document(self):
355+
global_id = self.get_global_id()
356+
content = self.get_description()
357+
if content:
358+
return {'uuid': self.get_uuid5(global_id), 'id': global_id, 'content': content}
359+
else:
360+
return None
361+
362+
## -Search- ##
363+
352364
# TODO FIXME
353365
def get_all_urls(self, date=False, epoch=None):
354366
if date:

bin/lib/objects/Images.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ def get_description(self, model=None):
108108
description = description.replace("`", ' ')
109109
return description
110110

111+
def get_search_document(self):
112+
global_id = self.get_global_id()
113+
content = self.get_description()
114+
if content:
115+
return {'uuid': self.get_uuid5(global_id), 'id': global_id, 'content': content}
116+
else:
117+
return None
118+
111119
def get_misp_object(self):
112120
obj_attrs = []
113121
obj = MISPObject('file')

bin/lib/objects/Screenshots.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ def get_description(self, model=None):
112112
model = get_default_image_description_model()
113113
return self._get_field(f'desc:{model}')
114114

115+
def get_search_document(self):
116+
global_id = self.get_global_id()
117+
content = self.get_description()
118+
if content:
119+
return {'uuid': self.get_uuid5(global_id), 'id': global_id, 'content': content}
120+
else:
121+
return None
122+
115123
def get_misp_object(self):
116124
obj_attrs = []
117125
obj = MISPObject('file')

bin/lib/search_engine.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
from lib import ail_logger
1717
from lib.ConfigLoader import ConfigLoader
1818
from lib.objects import Domains
19+
from lib.objects import Images
1920
from lib.objects import Items
2021
from lib.objects import Messages
22+
from lib.objects import Screenshots
2123
from lib import chats_viewer
2224

2325
logging.config.dictConfig(ail_logger.get_config(name='ail'))
@@ -66,7 +68,7 @@ def get_indexes(self):
6668
return self.client.get_indexes()
6769

6870
def _create_indexes(self):
69-
for index in ['cdiscord', 'ctelegram', 'cmatrix', 'tor', 'web']: # TODO dynamic load of chat uuid ?
71+
for index in ['cdiscord', 'ctelegram', 'cmatrix', 'desc:dom', 'desc:img', 'desc:screen', 'tor', 'web']: # TODO dynamic load of chat uuid ?
7072
self.client.create_index(index, {'primaryKey': 'uuid'})
7173

7274
def add(self, index, document):
@@ -96,8 +98,11 @@ def index_all():
9698
# Engine._delete('tor')
9799
# Engine._delete('web')
98100
Engine._create_indexes()
99-
index_crawled()
100-
index_chats_messages()
101+
# index_crawled()
102+
# index_chats_messages()
103+
index_images_descriptions()
104+
index_screenshots_descriptions()
105+
# index_domains_descriptions()
101106

102107
# TODO index titles
103108
def _index_crawled_domain(dom_id):
@@ -133,6 +138,43 @@ def index_chats_messages():
133138
for message in chats_viewer.get_messages_iterator():
134139
index_message(message)
135140

141+
142+
def index_image_description(image):
143+
index = f'desc:img'
144+
document = image.get_search_document()
145+
if document:
146+
Engine.add(index, document)
147+
148+
def index_images_descriptions():
149+
for image in Images.get_all_images_objects():
150+
index_image_description(image)
151+
152+
153+
def index_screenshot_description(screenshot):
154+
index = f'desc:screen'
155+
document = screenshot.get_search_document()
156+
if document:
157+
Engine.add(index, document)
158+
159+
def index_screenshots_descriptions():
160+
for screenshot in Screenshots.get_screenshots_obj_iterator():
161+
index_screenshot_description(screenshot)
162+
163+
164+
def index_domain_description(domain_id):
165+
index = f'desc:dom'
166+
domain = Domains.Domain(domain_id)
167+
document = domain.get_search_description_document()
168+
if document:
169+
Engine.add(index, document)
170+
171+
def index_domains_descriptions():
172+
for dom_id in Domains.get_domains_up_by_type('onion'):
173+
index_domain_description(dom_id)
174+
for dom_id in Domains.get_domains_up_by_type('web'):
175+
index_domain_description(dom_id)
176+
177+
136178
def log(user_id, index, to_search):
137179
logger.warning(f'{user_id} search: {index} - {to_search}')
138180

0 commit comments

Comments
 (0)