Skip to content

Commit 9f3f239

Browse files
committed
chg: [mail search] improve search perf, cache search, paginate mail domain search
1 parent e761049 commit 9f3f239

File tree

4 files changed

+187
-63
lines changed

4 files changed

+187
-63
lines changed

bin/lib/ail_core.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,26 @@ def rreplace(s, old, new, occurrence):
187187
li = s.rsplit(old, occurrence)
188188
return new.join(li)
189189

190+
def get_template_pagination(elems, total, page=1, nb=50):
191+
if len(elems) > nb:
192+
return paginate_iterator(elems, nb_obj=nb, page=page)
193+
else:
194+
dict_page = {'nb_all_elem': total}
195+
nb_pages = dict_page['nb_all_elem'] / nb
196+
if not nb_pages.is_integer():
197+
nb_pages = int(nb_pages) + 1
198+
else:
199+
nb_pages = int(nb_pages)
200+
dict_page['list_elem'] = elems
201+
dict_page['page'] = page
202+
dict_page['nb_pages'] = nb_pages
203+
dict_page['nb_first_elem'] = (nb * (page - 1)) + 1
204+
if page == nb_pages:
205+
dict_page['nb_last_elem'] = total
206+
else:
207+
dict_page['nb_last_elem'] = (nb * page)
208+
return dict_page
209+
190210
def paginate_iterator(iter_elems, nb_obj=50, page=1):
191211
dict_page = {'nb_all_elem': len(iter_elems)}
192212
nb_pages = dict_page['nb_all_elem'] / nb_obj

bin/lib/objects/Mails.py

Lines changed: 155 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -205,83 +205,194 @@ def is_indexed_username(username):
205205
return r_search.exists(f'm:u:{username}')
206206

207207

208-
def search_domain(s_domain=None, r_pos=False): # TODO paginate
208+
def search_domain(s_domain=None, r_pos=False, page=1, nb=500):
209209
if not s_domain:
210-
return r_search.smembers('m:domains') # TODO paginate
210+
domains = []
211+
total = r_search.scard('m:domains')
212+
start = nb * (page - 1)
213+
stop = start + nb - 1
214+
cursor = 0
215+
for domain in r_search.smembers('m:domains'):
216+
if start <= cursor <= stop:
217+
domains.append(domain)
218+
elif cursor > stop:
219+
break
220+
cursor += 1
221+
return total, domains
211222
else:
212223
domains = {}
213224
re_search = re.compile(s_domain)
214-
for domain in sscan_iterator(r_search, 'm:domains'):
215-
if s_domain in domain:
216-
res = re.search(re_search, domain)
217-
if res:
218-
domains[domain] = {}
219-
if r_pos:
225+
total, results = get_cache_search_mail(s_domain=s_domain, page=page, nb=nb)
226+
if results is None:
227+
results = []
228+
start = nb * (page - 1)
229+
stop = start + nb - 1
230+
cursor = 0
231+
for domain in sscan_iterator(r_search, 'm:domains'):
232+
if s_domain in domain:
233+
results.append(domain)
234+
if start <= cursor <= stop:
235+
domains[domain] = {}
236+
if r_pos:
237+
res = re.search(re_search, domain)
238+
if res:
239+
domains[domain]['hl-start'] = res.start()
240+
domains[domain]['hl-end'] = res.end()
241+
domains[domain]['content'] = domain
242+
cursor += 1
243+
total = len(results)
244+
if results:
245+
cache_search_mail(results, s_domain=s_domain)
246+
else:
247+
for domain in results:
248+
domains[domain] = {}
249+
if r_pos:
250+
res = re.search(re_search, domain)
251+
if res:
220252
domains[domain]['hl-start'] = res.start()
221253
domains[domain]['hl-end'] = res.end()
222254
domains[domain]['content'] = domain
223-
return domains
255+
return total, domains
224256

225-
def search_domain_username(domain, s_username=None, r_pos=False): # TODO paginate
257+
def search_domain_username(domain, s_username=None, r_pos=False, page=1, nb=500):
226258
objs = {}
227259
if not s_username:
228-
for username in r_search.smembers(f'm:d:{domain}'): # TODO paginate + SSCAN ????
229-
content = f'{username}@{domain}'
230-
obj_id = get_mail_id(content)
231-
objs[obj_id] = {}
232-
objs[obj_id]['content'] = content
260+
total = r_search.scard(f'm:d:{domain}')
261+
start = nb * (page - 1)
262+
stop = start + nb - 1
263+
cursor = 0
264+
for username in sscan_iterator(r_search, f'm:d:{domain}'):
265+
if start <= cursor <= stop:
266+
content = f'{username}@{domain}'
267+
obj_id = get_mail_id(content)
268+
objs[obj_id] = {}
269+
objs[obj_id]['content'] = content
270+
elif cursor > stop:
271+
break
272+
cursor += 1
233273
else:
234274
re_search = re.compile(s_username)
235-
for username in sscan_iterator(r_search, f'm:d:{domain}'):
236-
if s_username in username:
237-
res = re.search(re_search, username)
238-
if res:
239-
content = f'{username}@{domain}'
240-
obj_id = get_mail_id(content)
241-
objs[obj_id] = {}
242-
if r_pos:
275+
total, results = get_cache_search_mail(domain=domain, s_username=s_username, page=page, nb=nb)
276+
if results is None:
277+
results = []
278+
start = nb * (page - 1)
279+
stop = start + nb - 1
280+
cursor = 0
281+
for username in sscan_iterator(r_search, f'm:d:{domain}'):
282+
if s_username in username:
283+
results.append(username)
284+
if start <= cursor <= stop:
285+
content = f'{username}@{domain}'
286+
obj_id = get_mail_id(content)
287+
objs[obj_id] = {}
288+
if r_pos:
289+
res = re.search(re_search, username)
290+
if res:
291+
objs[obj_id]['hl-start'] = res.start()
292+
objs[obj_id]['hl-end'] = res.end()
293+
objs[obj_id]['content'] = content
294+
cursor += 1
295+
total = len(results)
296+
if results:
297+
cache_search_mail(results, domain=domain, s_username=s_username)
298+
else:
299+
for user in results:
300+
content = f'{user}@{domain}'
301+
obj_id = get_mail_id(content)
302+
objs[obj_id] = {}
303+
if r_pos:
304+
res = re.search(re_search, user)
305+
if res:
243306
objs[obj_id]['hl-start'] = res.start()
244307
objs[obj_id]['hl-end'] = res.end()
245308
objs[obj_id]['content'] = content
246-
return objs
309+
return total, objs
310+
247311

248-
def search_username_domain(username, s_domain=None, r_pos=False):
312+
def search_username_domain(username, s_domain=None, r_pos=False, page=1, nb=500):
249313
objs = {}
250314
if not s_domain:
251-
for domain in r_search.smembers(f'm:u:{username}'): # TODO paginate
252-
obj_id = f'{username}@{domain}'
253-
objs[obj_id] = {}
315+
total = r_search.scard(f'm:u:{username}')
316+
start = nb * (page - 1)
317+
stop = start + nb - 1
318+
cursor = 0
319+
for domain in sscan_iterator(r_search, f'm:u:{username}'):
320+
if start <= cursor <= stop:
321+
content = f'{username}@{domain}'
322+
obj_id = get_mail_id(content)
323+
objs[obj_id] = {}
324+
objs[obj_id]['content'] = content
325+
if cursor > stop:
326+
break
327+
cursor += 1
254328
else:
329+
total, results = get_cache_search_mail(username=username, s_domain=s_domain, page=page, nb=page)
255330
re_search = re.compile(s_domain)
256-
for domain in sscan_iterator(r_search, f'm:u:{username}'):
257-
if s_domain in domain:
258-
res = re.search(re_search, domain)
259-
if res:
260-
content = f'{username}@{domain}'
261-
obj_id = get_mail_id(content)
262-
objs[obj_id] = {}
263-
if r_pos:
331+
if results is None: # TODO no results
332+
results = []
333+
start = nb * (page - 1)
334+
stop = start + nb
335+
cursor = 0
336+
for domain in sscan_iterator(r_search, f'm:u:{username}'):
337+
if s_domain in domain:
338+
results.append(domain)
339+
if start <= cursor <= stop:
340+
content = f'{username}@{domain}'
341+
obj_id = get_mail_id(content)
342+
objs[obj_id] = {}
343+
if r_pos:
344+
res = re.search(re_search, domain)
345+
if res:
346+
objs[obj_id]['hl-start'] = len(username) + 1 + res.start()
347+
objs[obj_id]['hl-end'] = len(username) + 1 + res.end()
348+
objs[obj_id]['content'] = content
349+
cursor += 1
350+
total = len(results)
351+
if results:
352+
cache_search_mail(results, username=username, s_domain=s_domain)
353+
else:
354+
for dom in results:
355+
content = f'{username}@{dom}'
356+
obj_id = get_mail_id(content)
357+
objs[obj_id] = {}
358+
if r_pos:
359+
res = re.search(re_search, dom)
360+
if res:
264361
objs[obj_id]['hl-start'] = len(username) + 1 + res.start()
265362
objs[obj_id]['hl-end'] = len(username) + 1 + res.end()
266363
objs[obj_id]['content'] = content
267-
return objs
364+
return total, objs
268365

269366

270-
def search_mail(mail=None, username=None, domain=None, s_username=None, s_domain=None, r_pos=False):
271-
if mail: # TODO
367+
def search_mail(mail=None, username=None, domain=None, s_username=None, s_domain=None, r_pos=False, page=1, nb=500):
368+
if mail:
272369
m = get_mail(mail)
273370
if m.exists():
274-
return m.get_id()
371+
return 1, m.get_id()
275372

276373
if domain:
277374
if is_indexed_domain(domain):
278-
return search_domain_username(domain, s_username=s_username, r_pos=r_pos)
375+
return search_domain_username(domain, s_username=s_username, r_pos=r_pos, page=page, nb=nb)
279376
elif username:
280377
if is_indexed_username(username):
281-
return search_username_domain(username, s_domain=s_domain, r_pos=r_pos)
378+
return search_username_domain(username, s_domain=s_domain, r_pos=r_pos, page=page, nb=nb)
282379
elif s_domain:
283-
return search_domain(s_domain=s_domain, r_pos=r_pos)
284-
return None
380+
return search_domain(s_domain=s_domain, r_pos=r_pos, page=page, nb=nb)
381+
return None, None
382+
383+
def cache_search_mail(to_cache, username='', domain='', s_username='', s_domain=''):
384+
for result in to_cache:
385+
r_cache.rpush(f'm:{username}:{domain}:{s_username}:{s_domain}', result)
386+
r_cache.expire(f'm:{username}:{domain}:{s_username}:{s_domain}', 600)
387+
388+
def get_cache_search_mail(username='', domain='', s_username='', s_domain='', page=1, nb=500):
389+
total = r_cache.llen(f'm:{username}:{domain}:{s_username}:{s_domain}')
390+
if not total:
391+
return None, None
392+
else:
393+
start = nb * (page - 1)
394+
stop = start + nb - 1
395+
return total, r_cache.lrange(f'm:{username}:{domain}:{s_username}:{s_domain}', start, stop)
285396

286397

287398
class Mails(AbstractDaterangeObjects):

var/www/blueprints/objects_mail.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
##################################
2020
# Import Project packages
2121
##################################
22-
from lib.ail_core import paginate_iterator
22+
from lib.ail_core import paginate_iterator, get_template_pagination
2323
from lib.objects import Mails
2424
from packages import Date
2525
from lib import search_engine
@@ -121,9 +121,9 @@ def objects_mail_search():
121121
username = in_username.lower()
122122
if in_domain:
123123
s_domain = in_domain.lower()
124-
search_engine.log(user_id, 'mail', f'{mode}, {s_username} @ {s_domain}')
124+
search_engine.log(user_id, 'mail', f'{mode}, {username} @ {s_domain}')
125125
else:
126-
search_engine.log(user_id, 'mail', f'{mode}, {s_username} @')
126+
search_engine.log(user_id, 'mail', f'{mode}, {username} @')
127127
elif mode == 'exact':
128128
mail = f'{in_username.lower()}@{in_domain.lower()}'
129129
search_engine.log(user_id, 'mail', f'{mode}, {mail}')
@@ -136,23 +136,15 @@ def objects_mail_search():
136136
else:
137137
return create_json_response({'error': 'Invalid search mode domain_search, domain to search not provided'}, 400)
138138

139-
if username and domain:
140-
search_engine.log(user_id, 'mail', f'{mode}, {username} @ {domain}')
141-
elif username:
142-
search_engine.log(user_id, 'mail', f'{mode}, {username} @ {s_domain}')
143-
elif domain:
144-
search_engine.log(user_id, 'mail', f'{mode} {s_username} @ {domain}')
145-
else:
146-
search_engine.log(user_id, 'mail', f'{mode}, {username} @ {domain}')
147-
search_result = Mails.search_mail(mail=mail, username=username, domain=domain, s_username=s_username, s_domain=s_domain, r_pos=True)
139+
total, search_result = Mails.search_mail(mail=mail, username=username, domain=domain, s_username=s_username, s_domain=s_domain, r_pos=True, page=page, nb=500)
148140

149141
if isinstance(search_result, str):
150142
return redirect(url_for('correlation.show_correlation', type='mail', id=search_result))
151143
else:
152144
if search_result:
153145
mails = Mails.Mails()
154-
ids = sorted(search_result.keys())
155-
dict_page = paginate_iterator(ids, nb_obj=500, page=page)
146+
# ids = sorted(search_result.keys())
147+
dict_page = get_template_pagination(search_result, total, nb=500, page=page)
156148
if mode == 'domain_search':
157149
dict_objects = mails.get_domain_meta(dict_page['list_elem'])
158150
else:
@@ -167,5 +159,6 @@ def objects_mail_search():
167159
domain = s_domain
168160

169161
return render_template("search_mail_result.html", dict_objects=dict_objects, search_result=search_result,
170-
dict_page=dict_page, mode=mode, username=username, domain=domain)
162+
dict_page=dict_page, mode=mode, username=username, domain=domain,
163+
object_name='mails')
171164

var/www/templates/objects/mail/search_mail_result.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
<table id="table_objects" class="table table-striped table-bordered">
7171
<thead class="bg-dark text-white">
7272
<tr>
73-
<th></th>
73+
<th>Email</th>
7474
<th>First Seen</th>
7575
<th>Last Seen</th>
7676
<th>Total</th>
@@ -88,8 +88,8 @@
8888
{% endif %}
8989
</a>
9090
</td>
91-
<td>{{ dict_objects[obj_id]['first_seen'] }}</td>
92-
<td>{{ dict_objects[obj_id]['last_seen'] }}</td>
91+
<td>{% if dict_objects[obj_id]['first_seen'] %}{{ dict_objects[obj_id]['first_seen'][0:4] }}-{{ dict_objects[obj_id]['first_seen'][4:6] }}-{{ dict_objects[obj_id]['first_seen'][6:8] }}{% endif %}</td>
92+
<td>{% if dict_objects[obj_id]['last_seen'] %}{{ dict_objects[obj_id]['last_seen'][0:4] }}-{{ dict_objects[obj_id]['last_seen'][4:6] }}-{{ dict_objects[obj_id]['last_seen'][6:8] }}{% endif %}</td>
9393
<td>{{ dict_objects[obj_id]['nb_seen'] }}</td>
9494
</tr>
9595
{% endfor %}
@@ -130,7 +130,7 @@ <h3 class="text-center">No Results Found</h3>
130130
$('#table_objects').DataTable({
131131
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
132132
"iDisplayLength": 10,
133-
"order": [[ 3, "desc" ]]
133+
"order": [[ 0, "asc" ]]
134134
});
135135
});
136136

0 commit comments

Comments
 (0)