|
| 1 | +import logging |
| 2 | +from datetime import datetime |
| 3 | +from typing import Dict, List, Optional, Any |
| 4 | +import hashlib |
| 5 | +import json |
| 6 | + |
| 7 | +from ...storage import ChromaDBConnector |
| 8 | + |
| 9 | + |
| 10 | +class TechStackCompatibilityMatrix: |
| 11 | + """ |
| 12 | + UCKN Tech Stack Compatibility Matrix Molecule. |
| 13 | +
|
| 14 | + Manages and represents compatibility relationships between different |
| 15 | + technology stacks using a graph-based approach stored in ChromaDB. |
| 16 | + It tracks compatibility scores, common patterns, and success rates |
| 17 | + for various technology combinations. |
| 18 | +
|
| 19 | + Features: |
| 20 | + - Graph-based representation for complex compatibility relationships. |
| 21 | + - Defines relationships between technology components. |
| 22 | + - Assigns compatibility scores between different stacks (0.0-1.0). |
| 23 | + - Tracks common patterns for each technology combination (implicitly via metadata). |
| 24 | + - Records success rates and setup times (can be extended in metadata). |
| 25 | + - Updates dynamically based on pattern application results. |
| 26 | + """ |
| 27 | + |
| 28 | + _COLLECTION_NAME = "tech_stack_compatibility" |
| 29 | + |
| 30 | + def __init__(self, chroma_connector: ChromaDBConnector): |
| 31 | + """ |
| 32 | + Initializes the TechStackCompatibilityMatrix with a ChromaDB connector. |
| 33 | +
|
| 34 | + Args: |
| 35 | + chroma_connector: An instance of ChromaDBConnector. |
| 36 | + """ |
| 37 | + self.chroma_connector = chroma_connector |
| 38 | + self._logger = logging.getLogger(__name__) |
| 39 | + |
| 40 | + # IMPORTANT: For this molecule to function correctly, the ChromaDBConnector |
| 41 | + # must be updated to include the schema for "tech_stack_compatibility" |
| 42 | + # in its `_COLLECTION_SCHEMAS` dictionary. Without this, ChromaDBConnector's |
| 43 | + # internal metadata validation will fail for this collection, and |
| 44 | + # add/update operations will not succeed. |
| 45 | + if self._COLLECTION_NAME not in self.chroma_connector.collections: |
| 46 | + self._logger.error( |
| 47 | + f"ChromaDB collection '{self._COLLECTION_NAME}' is not initialized " |
| 48 | + "or recognized by the provided ChromaDBConnector. " |
| 49 | + "Please ensure ChromaDBConnector's `_COLLECTION_SCHEMAS` " |
| 50 | + f"includes the schema for '{self._COLLECTION_NAME}'." |
| 51 | + ) |
| 52 | + |
| 53 | + def is_available(self) -> bool: |
| 54 | + """ |
| 55 | + Checks if the ChromaDB connector is available. |
| 56 | +
|
| 57 | + Returns: |
| 58 | + True if ChromaDB is connected and ready, False otherwise. |
| 59 | + """ |
| 60 | + return self.chroma_connector.is_available() |
| 61 | + |
| 62 | + @staticmethod |
| 63 | + def _generate_combo_id(ts_a: List[str], ts_b: List[str]) -> str: |
| 64 | + """ |
| 65 | + Generates a consistent ID for a technology stack combination. |
| 66 | + The ID is generated by sorting and hashing the combined list of technologies |
| 67 | + to ensure (A, B) and (B, A) yield the same ID. |
| 68 | +
|
| 69 | + Args: |
| 70 | + ts_a: List of technologies in the first stack. |
| 71 | + ts_b: List of technologies in the second stack. |
| 72 | +
|
| 73 | + Returns: |
| 74 | + A unique string ID for the combination. |
| 75 | + """ |
| 76 | + # Sort individual lists for consistency |
| 77 | + sorted_ts_a = sorted(ts_a) |
| 78 | + sorted_ts_b = sorted(ts_b) |
| 79 | + |
| 80 | + # Combine and sort the two lists of technologies |
| 81 | + combined_sorted_techs = sorted(sorted_ts_a + sorted_ts_b) |
| 82 | + |
| 83 | + # Create a stable string representation |
| 84 | + combo_string = json.dumps(combined_sorted_techs) |
| 85 | + |
| 86 | + # Hash the string to create a unique ID |
| 87 | + return hashlib.sha256(combo_string.encode('utf-8')).hexdigest() |
| 88 | + |
| 89 | + def add_tech_stack_combo( |
| 90 | + self, |
| 91 | + ts_a: List[str], |
| 92 | + ts_b: List[str], |
| 93 | + score: float, |
| 94 | + description: str = "" |
| 95 | + ) -> Optional[str]: |
| 96 | + """ |
| 97 | + Adds a new technology stack compatibility combination to the matrix. |
| 98 | +
|
| 99 | + Args: |
| 100 | + ts_a: The first technology stack (list of technologies). |
| 101 | + ts_b: The second technology stack (list of technologies). |
| 102 | + score: Compatibility score between 0.0 and 1.0. |
| 103 | + description: A description of the compatibility. |
| 104 | +
|
| 105 | + Returns: |
| 106 | + The ID of the added combination if successful, None otherwise. |
| 107 | + """ |
| 108 | + if not self.is_available(): |
| 109 | + self._logger.error("ChromaDB not available, cannot add tech stack combo.") |
| 110 | + return None |
| 111 | + |
| 112 | + if not (0.0 <= score <= 1.0): |
| 113 | + self._logger.error(f"Invalid score {score}. Score must be between 0.0 and 1.0.") |
| 114 | + return None |
| 115 | + |
| 116 | + combo_id = self._generate_combo_id(ts_a, ts_b) |
| 117 | + now_iso = datetime.now().isoformat() |
| 118 | + |
| 119 | + metadata = { |
| 120 | + "tech_stack_a": sorted(ts_a), # Store sorted for consistent retrieval/comparison |
| 121 | + "tech_stack_b": sorted(ts_b), # Store sorted for consistent retrieval/comparison |
| 122 | + "score": score, |
| 123 | + "description": description, |
| 124 | + "created_at": now_iso, |
| 125 | + "updated_at": now_iso, |
| 126 | + "combo_id": combo_id |
| 127 | + } |
| 128 | + |
| 129 | + # A dummy document is needed for ChromaDB, as it's primarily text-based. |
| 130 | + # The actual "knowledge" is in the metadata for this collection. |
| 131 | + document_text = f"Compatibility between {', '.join(ts_a)} and {', '.join(ts_b)}" |
| 132 | + |
| 133 | + # ChromaDBConnector's add_document requires an embedding. |
| 134 | + # Since this molecule doesn't have a SemanticSearch dependency, |
| 135 | + # a dummy embedding is provided. |
| 136 | + dummy_embedding = [0.0] * 384 # Common embedding dimension for sentence transformers |
| 137 | + |
| 138 | + success = self.chroma_connector.add_document( |
| 139 | + collection_name=self._COLLECTION_NAME, |
| 140 | + doc_id=combo_id, |
| 141 | + document=document_text, |
| 142 | + embedding=dummy_embedding, |
| 143 | + metadata=metadata |
| 144 | + ) |
| 145 | + return combo_id if success else None |
| 146 | + |
| 147 | + def get_compatibility_score(self, ts_a: List[str], ts_b: List[str]) -> Optional[float]: |
| 148 | + """ |
| 149 | + Retrieves the compatibility score between two technology stacks. |
| 150 | +
|
| 151 | + Args: |
| 152 | + ts_a: The first technology stack. |
| 153 | + ts_b: The second technology stack. |
| 154 | +
|
| 155 | + Returns: |
| 156 | + The compatibility score (float) if found, None otherwise. |
| 157 | + """ |
| 158 | + if not self.is_available(): |
| 159 | + self._logger.warning("ChromaDB not available, cannot get compatibility score.") |
| 160 | + return None |
| 161 | + |
| 162 | + combo_id = self._generate_combo_id(ts_a, ts_b) |
| 163 | + result = self.chroma_connector.get_document( |
| 164 | + collection_name=self._COLLECTION_NAME, |
| 165 | + doc_id=combo_id |
| 166 | + ) |
| 167 | + if result and "metadata" in result: |
| 168 | + return result["metadata"].get("score") |
| 169 | + return None |
| 170 | + |
| 171 | + def get_compatibility_details(self, ts_a: List[str], ts_b: List[str]) -> Optional[Dict[str, Any]]: |
| 172 | + """ |
| 173 | + Retrieves full details of a specific compatibility combination. |
| 174 | +
|
| 175 | + Args: |
| 176 | + ts_a: The first technology stack. |
| 177 | + ts_b: The second technology stack. |
| 178 | +
|
| 179 | + Returns: |
| 180 | + A dictionary containing the full compatibility details, or None if not found. |
| 181 | + """ |
| 182 | + if not self.is_available(): |
| 183 | + self._logger.warning("ChromaDB not available, cannot get compatibility details.") |
| 184 | + return None |
| 185 | + |
| 186 | + combo_id = self._generate_combo_id(ts_a, ts_b) |
| 187 | + result = self.chroma_connector.get_document( |
| 188 | + collection_name=self._COLLECTION_NAME, |
| 189 | + doc_id=combo_id |
| 190 | + ) |
| 191 | + if result and "metadata" in result: |
| 192 | + return result["metadata"] |
| 193 | + return None |
| 194 | + |
| 195 | + def update_compatibility_score( |
| 196 | + self, |
| 197 | + ts_a: List[str], |
| 198 | + ts_b: List[str], |
| 199 | + new_score: Optional[float] = None, |
| 200 | + new_description: Optional[str] = None |
| 201 | + ) -> bool: |
| 202 | + """ |
| 203 | + Updates an existing technology stack compatibility combination. |
| 204 | +
|
| 205 | + Args: |
| 206 | + ts_a: The first technology stack. |
| 207 | + ts_b: The second technology stack. |
| 208 | + new_score: The new compatibility score (0.0-1.0), optional. |
| 209 | + new_description: The new description, optional. |
| 210 | +
|
| 211 | + Returns: |
| 212 | + True if updated successfully, False otherwise. |
| 213 | + """ |
| 214 | + if not self.is_available(): |
| 215 | + self._logger.warning("ChromaDB not available, cannot update compatibility score.") |
| 216 | + return False |
| 217 | + |
| 218 | + if new_score is not None and not (0.0 <= new_score <= 1.0): |
| 219 | + self._logger.error(f"Invalid new score {new_score}. Score must be between 0.0 and 1.0.") |
| 220 | + return False |
| 221 | + |
| 222 | + combo_id = self._generate_combo_id(ts_a, ts_b) |
| 223 | + |
| 224 | + # Retrieve existing metadata to merge updates |
| 225 | + existing_doc = self.chroma_connector.get_document( |
| 226 | + collection_name=self._COLLECTION_NAME, |
| 227 | + doc_id=combo_id |
| 228 | + ) |
| 229 | + if not existing_doc: |
| 230 | + self._logger.warning(f"Compatibility combo '{combo_id}' not found for update.") |
| 231 | + return False |
| 232 | + |
| 233 | + updated_metadata = existing_doc["metadata"].copy() |
| 234 | + if new_score is not None: |
| 235 | + updated_metadata["score"] = new_score |
| 236 | + if new_description is not None: |
| 237 | + updated_metadata["description"] = new_description |
| 238 | + |
| 239 | + updated_metadata["updated_at"] = datetime.now().isoformat() |
| 240 | + |
| 241 | + # No document text or embedding update needed unless specified, |
| 242 | + # but ChromaDB update method requires them to be passed as None if not changing. |
| 243 | + return self.chroma_connector.update_document( |
| 244 | + collection_name=self._COLLECTION_NAME, |
| 245 | + doc_id=combo_id, |
| 246 | + document=None, # No change to document text |
| 247 | + embedding=None, # No change to embedding |
| 248 | + metadata=updated_metadata |
| 249 | + ) |
| 250 | + |
| 251 | + def delete_tech_stack_combo(self, ts_a: List[str], ts_b: List[str]) -> bool: |
| 252 | + """ |
| 253 | + Deletes a technology stack compatibility combination from the matrix. |
| 254 | +
|
| 255 | + Args: |
| 256 | + ts_a: The first technology stack. |
| 257 | + ts_b: The second technology stack. |
| 258 | +
|
| 259 | + Returns: |
| 260 | + True if deleted successfully, False otherwise. |
| 261 | + """ |
| 262 | + if not self.is_available(): |
| 263 | + self._logger.warning("ChromaDB not available, cannot delete tech stack combo.") |
| 264 | + return False |
| 265 | + |
| 266 | + combo_id = self._generate_combo_id(ts_a, ts_b) |
| 267 | + return self.chroma_connector.delete_document( |
| 268 | + collection_name=self._COLLECTION_NAME, |
| 269 | + doc_id=combo_id |
| 270 | + ) |
| 271 | + |
| 272 | + def get_all_compatibility_scores(self) -> List[Dict[str, Any]]: |
| 273 | + """ |
| 274 | + Retrieves all stored technology stack compatibility combinations. |
| 275 | +
|
| 276 | + Returns: |
| 277 | + A list of dictionaries, each representing a compatibility combination. |
| 278 | + """ |
| 279 | + if not self.is_available(): |
| 280 | + self._logger.warning("ChromaDB not available, cannot retrieve all compatibility scores.") |
| 281 | + return [] |
| 282 | + |
| 283 | + all_docs = self.chroma_connector.get_all_documents(collection_name=self._COLLECTION_NAME) |
| 284 | + |
| 285 | + results = [] |
| 286 | + for doc in all_docs: |
| 287 | + if "metadata" in doc: |
| 288 | + results.append(doc["metadata"]) |
| 289 | + return results |
| 290 | + |
| 291 | + def search_compatibility( |
| 292 | + self, |
| 293 | + query_tech_stack: List[str], |
| 294 | + limit: int = 10, |
| 295 | + min_score: float = 0.0 |
| 296 | + ) -> List[Dict[str, Any]]: |
| 297 | + """ |
| 298 | + Searches for compatibility scores related to a given technology stack. |
| 299 | + This method will search for combinations where `query_tech_stack` is either |
| 300 | + `tech_stack_a` or `tech_stack_b`. |
| 301 | +
|
| 302 | + Args: |
| 303 | + query_tech_stack: The technology stack to search for. |
| 304 | + limit: Maximum number of results to return. |
| 305 | + min_score: Minimum compatibility score to include in results. |
| 306 | +
|
| 307 | + Returns: |
| 308 | + A list of relevant compatibility records. |
| 309 | + """ |
| 310 | + if not self.is_available(): |
| 311 | + self._logger.warning("ChromaDB not available, cannot search compatibility.") |
| 312 | + return [] |
| 313 | + |
| 314 | + # IMPORTANT: Due to the current ChromaDBConnector's API (which is read-only), |
| 315 | + # which primarily supports semantic search with embeddings, and lacks a direct |
| 316 | + # method for complex metadata-only queries (e.g., OR conditions on list types), |
| 317 | + # this method retrieves all documents and filters them in Python memory. |
| 318 | + # This approach may be inefficient for very large datasets. |
| 319 | + self._logger.warning( |
| 320 | + "Performing in-memory filtering for tech stack compatibility search " |
| 321 | + "due to ChromaDBConnector's current API limitations for metadata-only queries. " |
| 322 | + "This may be inefficient for large datasets." |
| 323 | + ) |
| 324 | + |
| 325 | + sorted_query_tech_stack = sorted(query_tech_stack) |
| 326 | + all_combos = self.get_all_compatibility_scores() |
| 327 | + |
| 328 | + filtered_results = [] |
| 329 | + for combo in all_combos: |
| 330 | + ts_a_stored = combo.get("tech_stack_a", []) |
| 331 | + ts_b_stored = combo.get("tech_stack_b", []) |
| 332 | + score = combo.get("score", 0.0) |
| 333 | + |
| 334 | + # Ensure stored lists are also sorted for consistent comparison |
| 335 | + if sorted(ts_a_stored) == sorted_query_tech_stack or \ |
| 336 | + sorted(ts_b_stored) == sorted_query_tech_stack: |
| 337 | + if score >= min_score: |
| 338 | + filtered_results.append(combo) |
| 339 | + |
| 340 | + if len(filtered_results) >= limit: |
| 341 | + break |
| 342 | + |
| 343 | + return filtered_results |
| 344 | + |
0 commit comments