@@ -38,7 +38,7 @@ def get_image_obj(obj_gid):
3838
3939def create_ollama_domain_data (model , descriptions ):
4040 return json .dumps ({'model' : model ,
41- 'prompt' : f'From this list of images descriptions, Can you please describe this domain and check if it \' s related to child exploitation? \n \n { descriptions } ' ,
41+ 'prompt' : f'From this list of images descriptions, Describe this domain\n \n { descriptions } ' ,
4242 'stream' : False
4343 })
4444
@@ -49,6 +49,25 @@ def create_ollama_image_data(model, images):
4949 'images' : images
5050 })
5151
52+ def create_ollama_description_csam_classification (model , description ):
53+ return json .dumps ({'model' : model ,
54+ 'prompt' : f'Does this description involve CE or CSAM? Answer "Yes" or "No".\n Description: { description } ' ,
55+ 'stream' : False
56+ })
57+
58+ def create_ollama_image_csam_classification (model , images ):
59+ return json .dumps ({'model' : model ,
60+ 'prompt' : 'Does this image involve CE or CSAM? Answer "Yes" or "No".' ,
61+ 'stream' : False ,
62+ 'images' : images
63+ })
64+
65+ def create_ollama_domain_csam_classification (model , descriptions ):
66+ return json .dumps ({'model' : model ,
67+ 'prompt' : f'Is this website domain associated with child exploitation? Respond with only "Yes" or "No"\n \n { descriptions } ' ,
68+ 'stream' : False
69+ })
70+
5271# screenshot + image
5372def api_get_image_description (obj_gid ):
5473 model = get_default_image_description_model ()
@@ -72,7 +91,7 @@ def api_get_image_description(obj_gid):
7291 return {"status" : "error" , "reason" : f"ollama requests error: { e } " }, 400
7392 if res .status_code != 200 :
7493 # TODO LOG
75- return {"status" : "error" , "reason" : f" llama requests error: { res .status_code } , { res .text } " }, 400
94+ return {"status" : "error" , "reason" : f"ollama requests error: { res .status_code } , { res .text } " }, 400
7695 else :
7796 r = res .json ()
7897 if r :
@@ -117,7 +136,7 @@ def get_domain_description(domain_id, reprocess=True):
117136 return {"status" : "error" , "reason" : f"ollama requests error: { e } " }, 400
118137 if res .status_code != 200 :
119138 # TODO LOG
120- return {"status" : "error" , "reason" : f" llama requests error: { res .status_code } , { res .text } " }, 400
139+ return {"status" : "error" , "reason" : f"ollama requests error: { res .status_code } , { res .text } " }, 400
121140 else :
122141 r = res .json ()
123142 if r :
@@ -152,6 +171,98 @@ def _create_image_description():
152171 # print(f'{done}/{total} {progress}%')
153172
154173
174+ def update_domain_description (domain , model ):
175+ domain .delete_description (model )
176+ search_engine .remove_document ('desc-dom' , domain .get_global_id ())
177+ get_domain_description (domain .get_id (), reprocess = False )
178+
179+ def update_domains_descriptions ():
180+ nb_domains = Domains .get_nb_domains_up_by_type ('onion' ) + Domains .get_nb_domains_up_by_type ('web' )
181+ model = get_default_image_description_model ()
182+ done = 0
183+ for domain in Domains .get_domain_up_iterator ():
184+ update_domain_description (domain , model )
185+ done += 1
186+ progress = int (done * 100 / nb_domains )
187+ print (f'{ done } /{ nb_domains } { progress } %' )
188+ search_engine .delete_index ('desc-dom' )
189+
190+
191+ def check_is_image_csam (obj_gid , image_description = False ):
192+ model = get_default_image_description_model ()
193+
194+ image = get_image_obj (obj_gid )
195+ if not image :
196+ return {"status" : "error" , "reason" : "Unknown image" }, 404
197+
198+ headers = {"Connection" : "close" , 'Content-Type' : 'application/json' , 'Accept' : 'application/json' }
199+ is_csam = None
200+
201+ # Check if image description is CSAM related
202+ if image_description :
203+ description = api_get_image_description (obj_gid )
204+ if description [1 ] == 200 :
205+ description = description [0 ]
206+ data = create_ollama_description_csam_classification (model , description )
207+
208+ # Check If image content is CSAM
209+ else :
210+ b64 = image .get_base64 ()
211+ if not b64 :
212+ return {"status" : "error" , "reason" : "No Content" }, 404
213+ data = create_ollama_image_csam_classification (model , [b64 ])
214+
215+ if data :
216+ try :
217+ res = requests .post (f'{ OLLAMA_URL } /api/generate' , data = data , headers = headers )
218+ except Exception as e :
219+ return {"status" : "error" , "reason" : f"ollama requests error: { e } " }, 400
220+ if res .status_code != 200 :
221+ # TODO LOG
222+ return {"status" : "error" , "reason" : f"ollama requests error: { res .status_code } , { res .text } " }, 400
223+ else :
224+ r = res .json ()
225+ if r :
226+ res = image .add_description_model (model , r ['response' ])
227+ if res == 'YES' :
228+ is_csam = True
229+ elif res == 'NO' :
230+ is_csam = False
231+
232+ # TODO LOG NONE result
233+ if is_csam :
234+ image .add_tag ('dark-web:topic="pornography-child-exploitation"' )
235+ print (obj_gid , is_csam )
236+
237+ return is_csam , 200
238+
239+ def check_images_csam (image_description = False ):
240+ for image in Images .get_all_images_objects ():
241+ check_is_image_csam (image .get_global_id (), image_description = image_description )
242+
243+
244+ def check_if_domain_csam (domain_id ):
245+ model = get_default_image_description_model ()
246+ domain = Domains .Domain (domain_id )
247+ description = get_domain_description (domain_id )
248+
249+ headers = {"Connection" : "close" , 'Content-Type' : 'application/json' , 'Accept' : 'application/json' }
250+ try :
251+ res = requests .post (f'{ OLLAMA_URL } /api/generate' , data = create_ollama_domain_csam_classification (model , description ), headers = headers )
252+ except Exception as e : # TODO LOG
253+ return {"status" : "error" , "reason" : f"ollama requests error: { e } " }, 400
254+ if res .status_code != 200 :
255+ # TODO LOG
256+ return {"status" : "error" , "reason" : f"ollama requests error: { res .status_code } , { res .text } " }, 400
257+ else :
258+ r = res .json ()
259+ if r :
260+ if r ['response' ] == 'yes' :
261+ print ('yes' )
262+ domain .add_tag ('dark-web:topic="pornography-child-exploitation"' )
263+
264+
155265if __name__ == '__main__' :
156- # _create_domains_up_description()
157- _create_image_description ()
266+ update_domains_descriptions ()
267+ # check_images_csam()
268+ # _create_image_description()
0 commit comments