|
3 | 3 | Created Date: Saturday October 25th 2025 |
4 | 4 | Author: Christian Nonis <[email protected]> |
5 | 5 | ----- |
6 | | -Last Modified: Saturday October 25th 2025 11:55:47 am |
| 6 | +Last Modified: Saturday December 13th 2025 |
7 | 7 | Modified By: the developer formerly known as Christian Nonis at <[email protected]> |
8 | 8 | ----- |
9 | 9 | """ |
@@ -168,5 +168,117 @@ def save_kg_changes(self, kg_changes: KGChanges, brain_id: str) -> KGChanges: |
168 | 168 | collection.insert_one(kg_changes.model_dump(mode="json")) |
169 | 169 | return kg_changes |
170 | 170 |
|
| 171 | + def get_structured_data_by_id( |
| 172 | + self, id: str, brain_id: str |
| 173 | + ) -> StructuredData: |
| 174 | + collection = self.get_collection("structured_data", database=brain_id) |
| 175 | + result = collection.find_one({"id": id}) |
| 176 | + if not result: |
| 177 | + return None |
| 178 | + return StructuredData( |
| 179 | + id=result["id"], |
| 180 | + data=result["data"], |
| 181 | + types=result["types"], |
| 182 | + metadata=result.get("metadata", None), |
| 183 | + inserted_at=result.get("inserted_at", None), |
| 184 | + ) |
| 185 | + |
| 186 | + def get_structured_data_list( |
| 187 | + self, brain_id: str, limit: int = 10, skip: int = 0, types: list[str] = None, query_text: str = None |
| 188 | + ) -> list[StructuredData]: |
| 189 | + collection = self.get_collection("structured_data", database=brain_id) |
| 190 | + query = {} |
| 191 | + if types: |
| 192 | + query["types"] = {"$in": types} |
| 193 | + |
| 194 | + if query_text: |
| 195 | + query["$or"] = [ |
| 196 | + {"data": {"$regex": query_text, "$options": "i"}}, |
| 197 | + {"types": {"$regex": query_text, "$options": "i"}}, |
| 198 | + {"metadata": {"$regex": query_text, "$options": "i"}} |
| 199 | + ] |
| 200 | + |
| 201 | + results = collection.find(query).skip(skip).limit(limit) |
| 202 | + return [ |
| 203 | + StructuredData( |
| 204 | + id=result["id"], |
| 205 | + data=result["data"], |
| 206 | + types=result["types"], |
| 207 | + metadata=result.get("metadata", None), |
| 208 | + inserted_at=result.get("inserted_at", None), |
| 209 | + ) |
| 210 | + for result in results |
| 211 | + ] |
| 212 | + |
| 213 | + def get_structured_data_types(self, brain_id: str) -> list[str]: |
| 214 | + collection = self.get_collection("structured_data", database=brain_id) |
| 215 | + pipeline = [ |
| 216 | + {"$unwind": "$types"}, |
| 217 | + {"$group": {"_id": "$types"}}, |
| 218 | + {"$sort": {"_id": 1}} |
| 219 | + ] |
| 220 | + results = collection.aggregate(pipeline) |
| 221 | + return [result["_id"] for result in results] |
| 222 | + |
| 223 | + def get_observation_by_id( |
| 224 | + self, id: str, brain_id: str |
| 225 | + ) -> Observation: |
| 226 | + collection = self.get_collection("observations", database=brain_id) |
| 227 | + result = collection.find_one({"id": id}) |
| 228 | + if not result: |
| 229 | + return None |
| 230 | + return Observation( |
| 231 | + id=result["id"], |
| 232 | + text=result["text"], |
| 233 | + metadata=result.get("metadata", None), |
| 234 | + resource_id=result["resource_id"], |
| 235 | + inserted_at=result.get("inserted_at", None), |
| 236 | + ) |
| 237 | + |
| 238 | + def get_observations_list( |
| 239 | + self, |
| 240 | + brain_id: str, |
| 241 | + limit: int = 10, |
| 242 | + skip: int = 0, |
| 243 | + resource_id: str = None, |
| 244 | + labels: list[str] = None, |
| 245 | + query_text: str = None |
| 246 | + ) -> list[Observation]: |
| 247 | + collection = self.get_collection("observations", database=brain_id) |
| 248 | + query = {} |
| 249 | + if resource_id: |
| 250 | + query["resource_id"] = resource_id |
| 251 | + if labels: |
| 252 | + query["metadata.labels"] = {"$in": labels} |
| 253 | + |
| 254 | + if query_text: |
| 255 | + query["$or"] = [ |
| 256 | + {"text": {"$regex": query_text, "$options": "i"}}, |
| 257 | + {"resource_id": {"$regex": query_text, "$options": "i"}}, |
| 258 | + {"metadata": {"$regex": query_text, "$options": "i"}} |
| 259 | + ] |
| 260 | + |
| 261 | + results = collection.find(query).skip(skip).limit(limit) |
| 262 | + return [ |
| 263 | + Observation( |
| 264 | + id=result["id"], |
| 265 | + text=result["text"], |
| 266 | + metadata=result.get("metadata", None), |
| 267 | + resource_id=result["resource_id"], |
| 268 | + inserted_at=result.get("inserted_at", None), |
| 269 | + ) |
| 270 | + for result in results |
| 271 | + ] |
| 272 | + |
| 273 | + def get_observation_labels(self, brain_id: str) -> list[str]: |
| 274 | + collection = self.get_collection("observations", database=brain_id) |
| 275 | + pipeline = [ |
| 276 | + {"$match": {"metadata.labels": {"$exists": True}}}, |
| 277 | + {"$unwind": "$metadata.labels"}, |
| 278 | + {"$group": {"_id": "$metadata.labels"}}, |
| 279 | + {"$sort": {"_id": 1}} |
| 280 | + ] |
| 281 | + results = collection.aggregate(pipeline) |
| 282 | + return [result["_id"] for result in results] |
171 | 283 |
|
172 | 284 | _mongo_client = MongoClient() |
0 commit comments