forked from yoheinakajima/babyagi3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstore.py
More file actions
4270 lines (3797 loc) · 147 KB
/
store.py
File metadata and controls
4270 lines (3797 loc) · 147 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Memory store - database operations for the memory system.
Uses SQLite with sqlite-vec for vector search.
"""
import json
import os
import sqlite3
import struct
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from uuid import uuid4
import logging
logger = logging.getLogger(__name__)
from .models import (
AgentState,
Credential,
Edge,
Entity,
Event,
EventTopic,
ExtractionCall,
Fact,
Learning,
RetrievalQuery,
RetrievalResult,
SummaryNode,
Task,
ToolDefinition,
ToolRecord,
Topic,
)
# Vector dimensions
EMBEDDING_DIM = 1536
# ═══════════════════════════════════════════════════════════
# EVENT RETENTION POLICY
# ═══════════════════════════════════════════════════════════
@dataclass
class RetentionPolicy:
"""Configuration for event retention."""
# Maximum age for events (None = keep forever)
max_age_days: int | None = 365
# Maximum number of events (None = no limit)
max_events: int | None = 100000
# Keep important events longer
important_event_types: list[str] | None = None # e.g., ["task_completed", "observation"]
important_multiplier: float = 3.0 # Keep important events 3x longer
# Never delete events with these properties
preserve_with_entities: bool = True # Keep events linked to entities
preserve_owner_events: bool = True # Keep owner's events
# Batch size for cleanup
cleanup_batch_size: int = 1000
def serialize_embedding(embedding: list[float] | None) -> bytes | None:
"""Serialize embedding to bytes for SQLite storage."""
if embedding is None:
return None
return struct.pack(f"{len(embedding)}f", *embedding)
def deserialize_embedding(data: bytes | None) -> list[float] | None:
"""Deserialize embedding from bytes."""
if data is None:
return None
count = len(data) // 4
return list(struct.unpack(f"{count}f", data))
def serialize_json(obj: Any) -> str | None:
"""Serialize object to JSON string."""
if obj is None:
return None
return json.dumps(obj)
def deserialize_json(data: str | None) -> Any:
"""Deserialize JSON string to object."""
if data is None:
return None
return json.loads(data)
def generate_id() -> str:
"""Generate a unique ID."""
return str(uuid4())
def now_iso() -> str:
"""Get current time as ISO string."""
return datetime.now().isoformat()
def parse_datetime(s: str | None) -> datetime | None:
"""Parse ISO datetime string."""
if s is None:
return None
return datetime.fromisoformat(s)
class MemoryStore:
"""
SQLite-based storage for the memory system.
"""
def __init__(self, store_path: str = "~/.babyagi/memory"):
self.store_path = Path(store_path).expanduser()
self.store_path.mkdir(parents=True, exist_ok=True)
self.db_path = self.store_path / "memory.db"
self._conn: sqlite3.Connection | None = None
self._vec_available = False
@property
def conn(self) -> sqlite3.Connection:
"""Get database connection (lazy initialization)."""
if self._conn is None:
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
self._conn.row_factory = sqlite3.Row
# Try to load sqlite-vec extension
try:
self._conn.enable_load_extension(True)
# Try common locations for sqlite-vec
for ext_path in [
"vec0",
"/usr/local/lib/vec0",
"/usr/lib/vec0",
str(self.store_path / "vec0"),
]:
try:
self._conn.load_extension(ext_path)
self._vec_available = True
break
except sqlite3.OperationalError:
continue
except Exception as e:
logger.debug("SQLite extension loading not available: %s", e)
return self._conn
def initialize(self):
"""Initialize the database schema."""
self._create_tables()
# Run migrations before indices — new indices may reference new columns
self._migrate_tool_definitions()
self._migrate_learnings()
self._create_indices()
self._ensure_root_node()
self._ensure_agent_state()
def _migrate_tool_definitions(self):
"""Add new columns to tool_definitions for skills and composio support."""
cur = self.conn.cursor()
# Get existing columns
cur.execute("PRAGMA table_info(tool_definitions)")
existing_columns = {row["name"] for row in cur.fetchall()}
# Add new columns if they don't exist
migrations = [
("tool_type", "TEXT DEFAULT 'executable'"),
("skill_content", "TEXT"),
("composio_app", "TEXT"),
("composio_action", "TEXT"),
("depends_on", "TEXT"),
]
for col_name, col_def in migrations:
if col_name not in existing_columns:
try:
cur.execute(f"ALTER TABLE tool_definitions ADD COLUMN {col_name} {col_def}")
except sqlite3.OperationalError:
pass # Column might already exist
self.conn.commit()
def _migrate_learnings(self):
"""Add columns to learnings table for existing databases."""
cur = self.conn.cursor()
cur.execute("PRAGMA table_info(learnings)")
existing_columns = {row["name"] for row in cur.fetchall()}
migrations = {
"category": "TEXT NOT NULL DEFAULT 'general'",
"superseded_by": "TEXT", # ID of the learning that replaced this one
}
for col_name, col_def in migrations.items():
if col_name not in existing_columns:
try:
cur.execute(
f"ALTER TABLE learnings ADD COLUMN {col_name} {col_def}"
)
except sqlite3.OperationalError:
pass
self.conn.commit()
def _create_tables(self):
"""Create all tables."""
cur = self.conn.cursor()
# Events
cur.execute(
"""
CREATE TABLE IF NOT EXISTS events (
id TEXT PRIMARY KEY,
timestamp TEXT NOT NULL,
channel TEXT,
direction TEXT NOT NULL,
event_type TEXT NOT NULL,
task_id TEXT,
tool_id TEXT,
person_id TEXT,
is_owner INTEGER NOT NULL DEFAULT 0,
parent_event_id TEXT,
conversation_id TEXT,
content TEXT NOT NULL,
content_embedding BLOB,
metadata TEXT,
extraction_status TEXT DEFAULT 'pending',
extracted_at TEXT,
created_at TEXT NOT NULL
)
"""
)
# Entities
cur.execute(
"""
CREATE TABLE IF NOT EXISTS entities (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
type TEXT NOT NULL,
type_raw TEXT NOT NULL,
aliases TEXT,
description TEXT,
name_embedding BLOB,
is_owner INTEGER DEFAULT 0,
is_self INTEGER DEFAULT 0,
event_count INTEGER DEFAULT 0,
first_seen TEXT NOT NULL,
last_seen TEXT NOT NULL,
source_event_ids TEXT,
summary_node_id TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
"""
)
# Edges
cur.execute(
"""
CREATE TABLE IF NOT EXISTS edges (
id TEXT PRIMARY KEY,
source_entity_id TEXT NOT NULL,
target_entity_id TEXT NOT NULL,
relation TEXT NOT NULL,
relation_type TEXT,
relation_embedding BLOB,
is_current INTEGER DEFAULT 1,
strength REAL DEFAULT 0.5,
source_event_ids TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (source_entity_id) REFERENCES entities(id),
FOREIGN KEY (target_entity_id) REFERENCES entities(id)
)
"""
)
# Topics
cur.execute(
"""
CREATE TABLE IF NOT EXISTS topics (
id TEXT PRIMARY KEY,
label TEXT NOT NULL,
description TEXT,
keywords TEXT,
embedding BLOB,
parent_topic_id TEXT,
event_count INTEGER DEFAULT 0,
entity_count INTEGER DEFAULT 0,
summary_node_id TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (parent_topic_id) REFERENCES topics(id)
)
"""
)
# Event-Topic junction
cur.execute(
"""
CREATE TABLE IF NOT EXISTS event_topics (
event_id TEXT NOT NULL,
topic_id TEXT NOT NULL,
relevance REAL DEFAULT 1.0,
created_at TEXT NOT NULL,
PRIMARY KEY (event_id, topic_id),
FOREIGN KEY (event_id) REFERENCES events(id),
FOREIGN KEY (topic_id) REFERENCES topics(id)
)
"""
)
# Tasks
cur.execute(
"""
CREATE TABLE IF NOT EXISTS tasks (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
description TEXT,
type_raw TEXT,
type_cluster TEXT,
type_embedding BLOB,
status TEXT DEFAULT 'pending',
outcome TEXT,
person_id TEXT,
created_by_event_id TEXT,
summary_node_id TEXT,
created_at TEXT NOT NULL,
started_at TEXT,
completed_at TEXT,
FOREIGN KEY (person_id) REFERENCES entities(id),
FOREIGN KEY (created_by_event_id) REFERENCES events(id)
)
"""
)
# Summary Nodes
cur.execute(
"""
CREATE TABLE IF NOT EXISTS summary_nodes (
id TEXT PRIMARY KEY,
node_type TEXT NOT NULL,
key TEXT NOT NULL UNIQUE,
label TEXT NOT NULL,
parent_id TEXT,
summary TEXT,
summary_embedding BLOB,
summary_updated_at TEXT,
events_since_update INTEGER DEFAULT 0,
event_count INTEGER DEFAULT 0,
first_event_at TEXT,
last_event_at TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (parent_id) REFERENCES summary_nodes(id)
)
"""
)
# Tools (legacy - kept for backward compatibility)
cur.execute(
"""
CREATE TABLE IF NOT EXISTS tools (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
description_embedding BLOB,
usage_count INTEGER DEFAULT 0,
last_used_at TEXT,
summary_node_id TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
"""
)
# Tool Definitions - full tool persistence for self-improvement
cur.execute(
"""
CREATE TABLE IF NOT EXISTS tool_definitions (
id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
description TEXT NOT NULL,
-- Tool type: "executable", "skill", "composio"
tool_type TEXT DEFAULT 'executable',
-- Definition (what makes it executable)
source_code TEXT,
parameters TEXT,
packages TEXT,
env TEXT,
tool_var_name TEXT,
-- For skills
skill_content TEXT,
-- For composio tools
composio_app TEXT,
composio_action TEXT,
-- Dependencies (JSON list of tool names this depends on)
depends_on TEXT,
-- Category
category TEXT DEFAULT 'custom',
-- State
is_enabled INTEGER DEFAULT 1,
is_dynamic INTEGER DEFAULT 1,
-- Execution statistics
usage_count INTEGER DEFAULT 0,
success_count INTEGER DEFAULT 0,
error_count INTEGER DEFAULT 0,
last_used_at TEXT,
last_error TEXT,
last_error_at TEXT,
avg_duration_ms REAL DEFAULT 0,
total_duration_ms REAL DEFAULT 0,
-- Graph integration
entity_id TEXT,
summary_node_id TEXT,
-- Versioning
version INTEGER DEFAULT 1,
-- Provenance
created_by_event_id TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (entity_id) REFERENCES entities(id),
FOREIGN KEY (summary_node_id) REFERENCES summary_nodes(id)
)
"""
)
# Agent State
cur.execute(
"""
CREATE TABLE IF NOT EXISTS agent_state (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
owner_entity_id TEXT,
self_entity_id TEXT,
current_topics TEXT,
mood TEXT,
focus TEXT,
active_tasks TEXT,
settings TEXT,
state_updated_at TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY (owner_entity_id) REFERENCES entities(id),
FOREIGN KEY (self_entity_id) REFERENCES entities(id)
)
"""
)
# Secure Credentials - for user accounts, credit cards, etc.
cur.execute(
"""
CREATE TABLE IF NOT EXISTS credentials (
id TEXT PRIMARY KEY,
credential_type TEXT NOT NULL,
service TEXT NOT NULL,
-- For user accounts
username TEXT,
email TEXT,
password_ref TEXT,
-- For credit cards
card_last_four TEXT,
card_type TEXT,
card_expiry TEXT,
card_ref TEXT,
billing_name TEXT,
billing_address TEXT,
-- Common fields
notes TEXT,
metadata TEXT,
-- Timestamps
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
last_used_at TEXT
)
"""
)
# Metrics tables
cur.execute(
"""
CREATE TABLE IF NOT EXISTS llm_calls (
id TEXT PRIMARY KEY,
timestamp TEXT NOT NULL,
source TEXT NOT NULL,
model TEXT NOT NULL,
thread_id TEXT,
input_tokens INTEGER NOT NULL,
output_tokens INTEGER NOT NULL,
cost_usd REAL NOT NULL,
duration_ms INTEGER NOT NULL,
stop_reason TEXT
)
"""
)
cur.execute(
"""
CREATE TABLE IF NOT EXISTS embedding_calls (
id TEXT PRIMARY KEY,
timestamp TEXT NOT NULL,
provider TEXT NOT NULL,
model TEXT NOT NULL,
text_count INTEGER NOT NULL,
token_estimate INTEGER NOT NULL,
cost_usd REAL NOT NULL,
duration_ms INTEGER NOT NULL,
cached INTEGER NOT NULL DEFAULT 0
)
"""
)
# Learnings table - for self-improvement system
cur.execute(
"""
CREATE TABLE IF NOT EXISTS learnings (
id TEXT PRIMARY KEY,
-- Source
source_type TEXT NOT NULL,
source_event_id TEXT,
-- Content
content TEXT NOT NULL,
content_embedding BLOB,
-- Classification
sentiment TEXT NOT NULL DEFAULT 'neutral',
confidence REAL DEFAULT 0.5,
category TEXT NOT NULL DEFAULT 'general',
-- Associations
tool_id TEXT,
topic_ids TEXT,
objective_type TEXT,
entity_ids TEXT,
-- Actionable insight
applies_when TEXT,
recommendation TEXT,
-- Stats
times_applied INTEGER DEFAULT 0,
last_applied_at TEXT,
-- Timestamps
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (source_event_id) REFERENCES events(id)
)
"""
)
# Facts table - unified triplet storage for all sources
cur.execute(
"""
CREATE TABLE IF NOT EXISTS facts (
id TEXT PRIMARY KEY,
-- Triplet Core
subject_entity_id TEXT NOT NULL,
predicate TEXT NOT NULL,
object_entity_id TEXT,
object_value TEXT,
object_type TEXT NOT NULL DEFAULT 'value',
-- Additional entities mentioned
mentioned_entity_ids TEXT,
-- Classification
fact_type TEXT DEFAULT 'relation',
predicate_type TEXT,
-- Human-readable (LLM-generated)
fact_text TEXT NOT NULL,
fact_embedding BLOB,
-- Provenance
source_type TEXT NOT NULL DEFAULT 'conversation',
source_id TEXT,
source_event_ids TEXT,
-- Confidence & Strength
confidence REAL DEFAULT 0.8,
strength REAL DEFAULT 0.5,
-- Temporality
valid_from TEXT,
valid_to TEXT,
is_current INTEGER DEFAULT 1,
-- Usage tracking
times_retrieved INTEGER DEFAULT 0,
times_used INTEGER DEFAULT 0,
last_used_at TEXT,
-- Timestamps
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY (subject_entity_id) REFERENCES entities(id),
FOREIGN KEY (object_entity_id) REFERENCES entities(id)
)
"""
)
# Retrieval queries - for learning optimal retrieval strategies
cur.execute(
"""
CREATE TABLE IF NOT EXISTS retrieval_queries (
id TEXT PRIMARY KEY,
-- The query
query_text TEXT NOT NULL,
query_embedding BLOB,
query_type TEXT,
-- Strategy used
chain_strategy TEXT,
-- Outcome
total_results INTEGER DEFAULT 0,
results_used INTEGER DEFAULT 0,
was_successful INTEGER DEFAULT 0,
-- Performance
total_time_ms INTEGER DEFAULT 0,
-- Context
objective_id TEXT,
event_id TEXT,
created_at TEXT NOT NULL
)
"""
)
# Retrieval results - individual results from queries
cur.execute(
"""
CREATE TABLE IF NOT EXISTS retrieval_results (
id TEXT PRIMARY KEY,
query_id TEXT NOT NULL,
-- What was found
result_type TEXT NOT NULL,
result_id TEXT NOT NULL,
-- How it was found
retrieval_method TEXT NOT NULL,
method_step INTEGER DEFAULT 1,
-- Relevance
similarity_score REAL,
rank_position INTEGER DEFAULT 0,
-- Usage
was_used INTEGER DEFAULT 0,
created_at TEXT NOT NULL,
FOREIGN KEY (query_id) REFERENCES retrieval_queries(id)
)
"""
)
# Extraction calls - metrics for extraction LLM calls
cur.execute(
"""
CREATE TABLE IF NOT EXISTS extraction_calls (
id TEXT PRIMARY KEY,
-- Source
source_type TEXT NOT NULL,
source_id TEXT,
content_length INTEGER DEFAULT 0,
-- Results
entities_extracted INTEGER DEFAULT 0,
facts_extracted INTEGER DEFAULT 0,
topics_extracted INTEGER DEFAULT 0,
-- Cost tracking
model TEXT,
input_tokens INTEGER DEFAULT 0,
output_tokens INTEGER DEFAULT 0,
cost_usd REAL DEFAULT 0.0,
duration_ms INTEGER DEFAULT 0,
-- Timing decision
timing_mode TEXT DEFAULT 'immediate',
created_at TEXT NOT NULL
)
"""
)
# Composio auth configs - stores auth_config_id per toolkit
cur.execute(
"""
CREATE TABLE IF NOT EXISTS composio_auth_configs (
id TEXT PRIMARY KEY,
toolkit TEXT NOT NULL UNIQUE,
auth_config_id TEXT NOT NULL,
auth_type TEXT NOT NULL DEFAULT 'managed',
scopes TEXT,
metadata TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
)
"""
)
self.conn.commit()
def _create_indices(self):
"""Create database indices."""
cur = self.conn.cursor()
indices = [
"CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_events_channel ON events(channel, timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_events_person ON events(person_id, timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_events_task ON events(task_id, timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_events_tool ON events(tool_id, timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_events_extraction ON events(extraction_status)",
"CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name)",
"CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(type)",
"CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_entity_id)",
"CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_entity_id)",
"CREATE INDEX IF NOT EXISTS idx_edges_relation_type ON edges(relation_type)",
"CREATE INDEX IF NOT EXISTS idx_topics_label ON topics(label)",
"CREATE INDEX IF NOT EXISTS idx_topics_parent ON topics(parent_topic_id)",
"CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status)",
"CREATE INDEX IF NOT EXISTS idx_tasks_type_cluster ON tasks(type_cluster)",
"CREATE INDEX IF NOT EXISTS idx_summary_nodes_key ON summary_nodes(key)",
"CREATE INDEX IF NOT EXISTS idx_summary_nodes_type ON summary_nodes(node_type)",
"CREATE INDEX IF NOT EXISTS idx_summary_nodes_parent ON summary_nodes(parent_id)",
"CREATE INDEX IF NOT EXISTS idx_summary_nodes_stale ON summary_nodes(events_since_update DESC)",
# Tool definitions indices
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_name ON tool_definitions(name)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_enabled ON tool_definitions(is_enabled)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_dynamic ON tool_definitions(is_dynamic)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_category ON tool_definitions(category)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_error_count ON tool_definitions(error_count DESC)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_tool_type ON tool_definitions(tool_type)",
"CREATE INDEX IF NOT EXISTS idx_tool_definitions_composio_app ON tool_definitions(composio_app)",
# Credentials indices
"CREATE INDEX IF NOT EXISTS idx_credentials_service ON credentials(service)",
"CREATE INDEX IF NOT EXISTS idx_credentials_type ON credentials(credential_type)",
# Metrics indices
"CREATE INDEX IF NOT EXISTS idx_llm_calls_timestamp ON llm_calls(timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_llm_calls_source ON llm_calls(source)",
"CREATE INDEX IF NOT EXISTS idx_llm_calls_model ON llm_calls(model)",
"CREATE INDEX IF NOT EXISTS idx_llm_calls_thread ON llm_calls(thread_id)",
"CREATE INDEX IF NOT EXISTS idx_embedding_calls_timestamp ON embedding_calls(timestamp DESC)",
"CREATE INDEX IF NOT EXISTS idx_embedding_calls_model ON embedding_calls(model)",
# Learnings indices
"CREATE INDEX IF NOT EXISTS idx_learnings_tool ON learnings(tool_id)",
"CREATE INDEX IF NOT EXISTS idx_learnings_objective_type ON learnings(objective_type)",
"CREATE INDEX IF NOT EXISTS idx_learnings_sentiment ON learnings(sentiment)",
"CREATE INDEX IF NOT EXISTS idx_learnings_source_type ON learnings(source_type)",
"CREATE INDEX IF NOT EXISTS idx_learnings_category ON learnings(category)",
"CREATE INDEX IF NOT EXISTS idx_learnings_created_at ON learnings(created_at DESC)",
# Facts indices
"CREATE INDEX IF NOT EXISTS idx_facts_subject ON facts(subject_entity_id)",
"CREATE INDEX IF NOT EXISTS idx_facts_object ON facts(object_entity_id)",
"CREATE INDEX IF NOT EXISTS idx_facts_predicate_type ON facts(predicate_type)",
"CREATE INDEX IF NOT EXISTS idx_facts_fact_type ON facts(fact_type)",
"CREATE INDEX IF NOT EXISTS idx_facts_source ON facts(source_type, source_id)",
"CREATE INDEX IF NOT EXISTS idx_facts_temporal ON facts(valid_from, valid_to)",
"CREATE INDEX IF NOT EXISTS idx_facts_current ON facts(is_current)",
"CREATE INDEX IF NOT EXISTS idx_facts_usage ON facts(times_used DESC)",
"CREATE INDEX IF NOT EXISTS idx_facts_strength ON facts(strength DESC)",
# Retrieval tracking indices
"CREATE INDEX IF NOT EXISTS idx_retrieval_queries_type ON retrieval_queries(query_type)",
"CREATE INDEX IF NOT EXISTS idx_retrieval_queries_success ON retrieval_queries(was_successful)",
"CREATE INDEX IF NOT EXISTS idx_retrieval_queries_created ON retrieval_queries(created_at DESC)",
"CREATE INDEX IF NOT EXISTS idx_retrieval_results_query ON retrieval_results(query_id)",
"CREATE INDEX IF NOT EXISTS idx_retrieval_results_method ON retrieval_results(retrieval_method, was_used)",
# Extraction calls indices
"CREATE INDEX IF NOT EXISTS idx_extraction_calls_source ON extraction_calls(source_type)",
"CREATE INDEX IF NOT EXISTS idx_extraction_calls_created ON extraction_calls(created_at DESC)",
]
for idx in indices:
cur.execute(idx)
self.conn.commit()
def _ensure_root_node(self):
"""Ensure the root summary node exists."""
cur = self.conn.cursor()
cur.execute("SELECT id FROM summary_nodes WHERE key = 'root'")
if cur.fetchone() is None:
now = now_iso()
cur.execute(
"""
INSERT INTO summary_nodes (id, node_type, key, label, summary, created_at, updated_at)
VALUES (?, 'root', 'root', 'Knowledge', 'No information yet.', ?, ?)
""",
(generate_id(), now, now),
)
self.conn.commit()
def _ensure_agent_state(self):
"""Ensure the agent state record exists."""
cur = self.conn.cursor()
cur.execute("SELECT id FROM agent_state LIMIT 1")
if cur.fetchone() is None:
now = now_iso()
cur.execute(
"""
INSERT INTO agent_state (id, name, settings, created_at)
VALUES (?, 'Agent', '{}', ?)
""",
(generate_id(), now),
)
self.conn.commit()
# ═══════════════════════════════════════════════════════════
# EVENTS
# ═══════════════════════════════════════════════════════════
def create_event(
self,
content: str,
event_type: str = "message",
channel: str | None = None,
direction: str = "internal",
task_id: str | None = None,
tool_id: str | None = None,
person_id: str | None = None,
is_owner: bool = False,
parent_event_id: str | None = None,
conversation_id: str | None = None,
metadata: dict | None = None,
content_embedding: list[float] | None = None,
) -> Event:
"""Create a new event."""
event_id = generate_id()
now = now_iso()
cur = self.conn.cursor()
cur.execute(
"""
INSERT INTO events (
id, timestamp, channel, direction, event_type, task_id, tool_id,
person_id, is_owner, parent_event_id, conversation_id, content,
content_embedding, metadata, extraction_status, created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?)
""",
(
event_id,
now,
channel,
direction,
event_type,
task_id,
tool_id,
person_id,
1 if is_owner else 0,
parent_event_id,
conversation_id,
content,
serialize_embedding(content_embedding),
serialize_json(metadata),
now,
),
)
self.conn.commit()
# Increment staleness on relevant summary nodes
self._increment_staleness_for_event(channel, tool_id, person_id, task_id)
return Event(
id=event_id,
timestamp=parse_datetime(now),
channel=channel,
direction=direction,
event_type=event_type,
task_id=task_id,
tool_id=tool_id,
person_id=person_id,
is_owner=is_owner,
parent_event_id=parent_event_id,
conversation_id=conversation_id,
content=content,
content_embedding=content_embedding,
metadata=metadata,
extraction_status="pending",
created_at=parse_datetime(now),
)
def get_event(self, event_id: str) -> Event | None:
"""Get an event by ID."""
cur = self.conn.cursor()
cur.execute("SELECT * FROM events WHERE id = ?", (event_id,))
row = cur.fetchone()
if row is None:
return None
return self._row_to_event(row)
def get_recent_events(
self,
limit: int = 10,
channel: str | None = None,
person_id: str | None = None,
task_id: str | None = None,
tool_id: str | None = None,
) -> list[Event]:
"""Get recent events with optional filters."""
cur = self.conn.cursor()
conditions = []
params = []
if channel:
conditions.append("channel = ?")
params.append(channel)
if person_id:
conditions.append("person_id = ?")
params.append(person_id)
if task_id:
conditions.append("task_id = ?")
params.append(task_id)
if tool_id:
conditions.append("tool_id = ?")
params.append(tool_id)
where_clause = " AND ".join(conditions) if conditions else "1=1"
params.append(limit)
cur.execute(
f"SELECT * FROM events WHERE {where_clause} ORDER BY timestamp DESC LIMIT ?",
params,
)
return [self._row_to_event(row) for row in cur.fetchall()]
def get_events_for_entity(self, entity_id: str, limit: int = 50) -> list[Event]:
"""Get events related to an entity."""
return self.get_recent_events(limit=limit, person_id=entity_id)
def get_pending_extraction_events(self, limit: int = 100) -> list[Event]:
"""Get events pending extraction."""
cur = self.conn.cursor()
cur.execute(
"SELECT * FROM events WHERE extraction_status = 'pending' ORDER BY timestamp ASC LIMIT ?",
(limit,),
)
return [self._row_to_event(row) for row in cur.fetchall()]
def update_event_extraction_status(
self, event_id: str, status: str, extracted_at: datetime | None = None
):
"""Update the extraction status of an event."""
cur = self.conn.cursor()
cur.execute(
"UPDATE events SET extraction_status = ?, extracted_at = ? WHERE id = ?",
(status, extracted_at.isoformat() if extracted_at else None, event_id),
)
self.conn.commit()
def increment_extraction_retry(self, event_id: str) -> int:
"""
Increment the retry count for an event extraction.
Returns the new retry count.
"""
cur = self.conn.cursor()
# Get current metadata
cur.execute("SELECT metadata FROM events WHERE id = ?", (event_id,))
row = cur.fetchone()