88
99try :
1010 from ..semantic_search import SemanticSearchEngine
11+
1112 SEMANTIC_SEARCH_ENGINE_AVAILABLE = True
1213 SearchEngineClass : type [Any ] | None = SemanticSearchEngine
1314except ImportError :
1415 logging .getLogger (__name__ ).warning (
15- "SemanticSearchEngine not found. "
16- "Semantic search capabilities will be limited."
16+ "SemanticSearchEngine not found. Semantic search capabilities will be limited."
1717 )
1818 SEMANTIC_SEARCH_ENGINE_AVAILABLE = False
1919 SearchEngineClass = None
@@ -26,26 +26,33 @@ class SemanticSearch:
2626
2727 def __init__ (self , knowledge_dir : str = ".uckn/knowledge" ):
2828 from pathlib import Path
29+
2930 self ._logger = logging .getLogger (__name__ )
3031 self .knowledge_dir = Path (knowledge_dir )
3132 self .engine : Any | None = None
3233 if SEMANTIC_SEARCH_ENGINE_AVAILABLE and SearchEngineClass :
3334 self .engine = SearchEngineClass (knowledge_dir = knowledge_dir )
3435 else :
35- self ._logger .warning ("SemanticSearchEngine not available, semantic encoding/search will be disabled." )
36+ self ._logger .warning (
37+ "SemanticSearchEngine not available, semantic encoding/search will be disabled."
38+ )
3639
3740 @property
3841 def embeddings_dir (self ) -> Path :
3942 """Expose embeddings_dir from underlying engine."""
40- if self .engine and hasattr (self .engine , ' embeddings_dir' ):
43+ if self .engine and hasattr (self .engine , " embeddings_dir" ):
4144 return self .engine .embeddings_dir
4245 return Path (self .knowledge_dir ) / "embeddings"
4346
4447 def is_available (self ) -> bool :
4548 """Check if the underlying semantic search engine is available."""
4649 # Check if dependencies are available dynamically (for test patching)
4750 import uckn .core
48- if hasattr (uckn .core , 'SENTENCE_TRANSFORMERS_AVAILABLE' ) and not uckn .core .SENTENCE_TRANSFORMERS_AVAILABLE :
51+
52+ if (
53+ hasattr (uckn .core , "SENTENCE_TRANSFORMERS_AVAILABLE" )
54+ and not uckn .core .SENTENCE_TRANSFORMERS_AVAILABLE
55+ ):
4956 return False
5057 return self .engine is not None and self .engine .is_available ()
5158
@@ -54,29 +61,47 @@ def encode(self, text: str) -> list[float] | None:
5461 Generate embeddings for text using the underlying sentence transformer model.
5562 """
5663 if not self .is_available ():
57- self ._logger .warning ("Semantic search engine not available, cannot encode text." )
64+ self ._logger .warning (
65+ "Semantic search engine not available, cannot encode text."
66+ )
5867 return None
5968 try :
6069 # The engine's generate_session_embedding expects a dict, but we just need encode
6170 # We can directly access the model if it's loaded.
62- if self .engine and hasattr (self .engine , 'sentence_model' ) and self .engine .sentence_model :
63- embedding = self .engine .sentence_model .encode (text , convert_to_numpy = True )
71+ if (
72+ self .engine
73+ and hasattr (self .engine , "sentence_model" )
74+ and self .engine .sentence_model
75+ ):
76+ embedding = self .engine .sentence_model .encode (
77+ text , convert_to_numpy = True
78+ )
6479 return embedding .tolist ()
6580 else :
66- self ._logger .error ("Sentence transformer model not loaded in SemanticSearchEngine." )
81+ self ._logger .error (
82+ "Sentence transformer model not loaded in SemanticSearchEngine."
83+ )
6784 return None
6885 except Exception as e :
6986 self ._logger .error (f"Failed to encode text: { e } " )
7087 return None
7188
72- def search (self , query : str , collection_name : str , limit : int = 10 , min_similarity : float = 0.7 ) -> list [dict [str , Any ]]:
89+ def search (
90+ self ,
91+ query : str ,
92+ collection_name : str ,
93+ limit : int = 10 ,
94+ min_similarity : float = 0.7 ,
95+ ) -> list [dict [str , Any ]]:
7396 """
7497 Perform semantic search using the underlying engine's capabilities.
7598 Note: This method is primarily for direct semantic search on raw text.
7699 For searching stored patterns, KnowledgeManager's search_patterns should be used.
77100 """
78101 if not self .is_available ():
79- self ._logger .warning ("Semantic search engine not available, cannot perform search." )
102+ self ._logger .warning (
103+ "Semantic search engine not available, cannot perform search."
104+ )
80105 return []
81106 try :
82107 # SemanticSearchEngine.search_similar_sessions is designed for session data.
@@ -85,7 +110,9 @@ def search(self, query: str, collection_name: str, limit: int = 10, min_similari
85110 # This method might be redundant if KnowledgeManager handles all searches.
86111 # For now, let's make it delegate to the engine's search if possible,
87112 # or indicate it's not the primary search interface.
88- self ._logger .info (f"Performing semantic search for query: '{ query } ' in collection '{ collection_name } '" )
113+ self ._logger .info (
114+ f"Performing semantic search for query: '{ query } ' in collection '{ collection_name } '"
115+ )
89116 # The SemanticSearchEngine's search_similar_sessions expects a query string
90117 # and searches its 'session_embeddings' collection.
91118 # To search other collections, we'd need direct ChromaDB access.
@@ -98,50 +125,69 @@ def search(self, query: str, collection_name: str, limit: int = 10, min_similari
98125 "SemanticSearch.search is a placeholder. "
99126 "Use KnowledgeManager.search_patterns for searching stored knowledge."
100127 )
101- return [] # This method is not directly used for stored patterns search by KM.
128+ return [] # This method is not directly used for stored patterns search by KM.
102129 except Exception as e :
103130 self ._logger .error (f"Semantic search failed: { e } " )
104131 return []
105132
106133 def _extract_text_for_embedding (self , session_data : dict [str , Any ]) -> str :
107134 """Extract meaningful text content from session data for embedding."""
108135 if not self .engine :
109- self ._logger .warning ("Semantic search engine not available, cannot extract text." )
136+ self ._logger .warning (
137+ "Semantic search engine not available, cannot extract text."
138+ )
110139 return ""
111140 return self .engine ._extract_text_for_embedding (session_data )
112141
113142 def get_embedding_stats (self ) -> dict [str , Any ]:
114143 """Get statistics about stored embeddings."""
115144 if not self .engine :
116- self ._logger .warning ("Semantic search engine not available, cannot get stats." )
145+ self ._logger .warning (
146+ "Semantic search engine not available, cannot get stats."
147+ )
117148 return {
118149 "total_embeddings" : 0 ,
119150 "storage_type" : "none" ,
120- "model_available" : False
151+ "model_available" : False ,
121152 }
122153 return self .engine .get_embedding_stats ()
123154
124- def search_similar_sessions (self , query : str , max_results : int = 10 ,
125- similarity_threshold : float = 0.7 ) -> list [dict [str , Any ]]:
155+ def search_similar_sessions (
156+ self , query : str , max_results : int = 10 , similarity_threshold : float = 0.7
157+ ) -> list [dict [str , Any ]]:
126158 """Search for similar sessions using semantic similarity."""
127159 if not self .engine :
128- self ._logger .warning ("Semantic search engine not available, cannot search sessions." )
160+ self ._logger .warning (
161+ "Semantic search engine not available, cannot search sessions."
162+ )
129163 return []
130- return self .engine .search_similar_sessions (query , max_results , similarity_threshold )
164+ return self .engine .search_similar_sessions (
165+ query , max_results , similarity_threshold
166+ )
131167
132- def store_session_embedding (self , session_id : str , session_data : dict [str , Any ]) -> bool :
168+ def store_session_embedding (
169+ self , session_id : str , session_data : dict [str , Any ]
170+ ) -> bool :
133171 """Store session embedding in vector database."""
134172 if not self .engine :
135- self ._logger .warning ("Semantic search engine not available, cannot store embedding." )
173+ self ._logger .warning (
174+ "Semantic search engine not available, cannot store embedding."
175+ )
136176 return False
137177 return self .engine .store_session_embedding (session_id , session_data )
138178
139- def _store_embedding_numpy (self , session_id : str , embedding : Any , session_data : dict [str , Any ]) -> None :
179+ def _store_embedding_numpy (
180+ self , session_id : str , embedding : Any , session_data : dict [str , Any ]
181+ ) -> None :
140182 """Store embedding using numpy fallback."""
141183 if not self .engine :
142- self ._logger .warning ("Semantic search engine not available, cannot store embedding." )
184+ self ._logger .warning (
185+ "Semantic search engine not available, cannot store embedding."
186+ )
143187 return
144- if hasattr (self .engine , '_store_embedding_numpy' ):
145- return self .engine ._store_embedding_numpy (session_id , embedding , session_data )
188+ if hasattr (self .engine , "_store_embedding_numpy" ):
189+ return self .engine ._store_embedding_numpy (
190+ session_id , embedding , session_data
191+ )
146192 else :
147193 self ._logger .warning ("Numpy storage not available in underlying engine." )
0 commit comments