@@ -302,7 +302,6 @@ async def _initialize_categories(self) -> None:
302302 self ._categories_ready = True
303303 return
304304 cat_texts = [self ._category_embedding_text (cfg ) for cfg in self .category_configs ]
305- print (cat_texts )
306305 cat_vecs = await self .openai .embed (cat_texts )
307306 self ._category_ids = []
308307 self ._category_name_to_id = {}
@@ -340,9 +339,9 @@ async def _preprocess_resource_url(
340339 Preprocess resource based on modality.
341340
342341 General preprocessing dispatcher for all modalities:
343- - Text-based modalities (conversation, document): use text content
344- - Audio modality: transcribe first, then process
345- - Media modalities (video, image): use local_path to file
342+ - Text-based modalities (conversation, document): require text content
343+ - Audio modality: transcribe audio file first, then process as text
344+ - Media modalities (video, image): process media files directly
346345
347346 Args:
348347 local_path: Local file path to the resource
@@ -383,8 +382,8 @@ async def _preprocess_resource_url(
383382 logger .warning (f"Unknown audio file type: { file_ext } , skipping transcription" )
384383 return None , None , None
385384
386- # For text-based modalities, text is required
387- if modality in ("conversation" , "document" , "audio" ):
385+ # For pure text-based modalities (not audio) , text is required
386+ if modality in ("conversation" , "document" ):
388387 if not text :
389388 return text , None , None
390389
0 commit comments