Skip to content

Commit 71e1253

Browse files
committed
fix: [Global + Tracker term] fix warning + content of invalid text mimetype
1 parent 3715306 commit 71e1253

File tree

2 files changed

+28
-25
lines changed

2 files changed

+28
-25
lines changed

bin/lib/objects/Decodeds.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,12 @@ def get_content(self, mimetype=None, r_type='str'):
131131
else:
132132
return b''
133133
if r_type == 'str':
134-
with open(filepath, 'r') as f:
135-
content = f.read()
136-
return content
134+
try:
135+
with open(filepath, 'r') as f:
136+
content = f.read()
137+
return content
138+
except UnicodeDecodeError:
139+
return ''
137140
elif r_type == 'bytes':
138141
with open(filepath, 'rb') as f:
139142
content = f.read()

bin/trackers/Tracker_Term.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -89,32 +89,32 @@ def compute(self, message):
8989
return None
9090

9191
content = obj.get_content()
92+
if content:
93+
signal.alarm(self.max_execution_time)
9294

93-
signal.alarm(self.max_execution_time)
95+
dict_words_freq = None
96+
try:
97+
dict_words_freq = Tracker.get_text_word_frequency(content)
98+
except TimeoutException:
99+
self.logger.warning(f"{self.obj.get_global_id()} processing timeout")
100+
else:
101+
signal.alarm(0)
94102

95-
dict_words_freq = None
96-
try:
97-
dict_words_freq = Tracker.get_text_word_frequency(content)
98-
except TimeoutException:
99-
self.logger.warning(f"{self.obj.get_global_id()} processing timeout")
100-
else:
101-
signal.alarm(0)
103+
if dict_words_freq:
102104

103-
if dict_words_freq:
104-
105-
# check solo words
106-
for word in self.tracked_words[obj_type]:
107-
if word in dict_words_freq:
108-
self.new_tracker_found(word, 'word', obj)
109-
110-
# check words set
111-
for tracked_set in self.tracked_sets[obj_type]:
112-
nb_uniq_word = 0
113-
for word in tracked_set['words']:
105+
# check solo words
106+
for word in self.tracked_words[obj_type]:
114107
if word in dict_words_freq:
115-
nb_uniq_word += 1
116-
if nb_uniq_word >= tracked_set['nb']:
117-
self.new_tracker_found(tracked_set['tracked'], 'set', obj)
108+
self.new_tracker_found(word, 'word', obj)
109+
110+
# check words set
111+
for tracked_set in self.tracked_sets[obj_type]:
112+
nb_uniq_word = 0
113+
for word in tracked_set['words']:
114+
if word in dict_words_freq:
115+
nb_uniq_word += 1
116+
if nb_uniq_word >= tracked_set['nb']:
117+
self.new_tracker_found(tracked_set['tracked'], 'set', obj)
118118

119119
def new_tracker_found(self, tracker_name, tracker_type, obj): # TODO FILTER
120120
obj_id = obj.get_id()

0 commit comments

Comments
 (0)