Skip to content

Commit 83e280c

Browse files
authored
feat: improve population of sequence log (#2752)
1 parent 8358354 commit 83e280c

28 files changed

+1045
-187
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
lowered from 384 to 256 estimated tokens to leave sufficient headroom for
1212
content where the word-based heuristic undercounts.
1313

14+
### Added
15+
- Sequence log backfill now includes agent entities and agent links alongside
16+
journal entries and entry links, closing sync gaps for agent data.
17+
1418
## [0.9.903] - 2026-03-06
1519
### Fixed
1620
- AI task agent no longer repeatedly proposes checklist items that already exist

flatpak/com.matthiasn.lotti.metainfo.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
<release version="0.9.904" date="2026-03-06">
3535
<description>
3636
<p>Fixed embedding backfill failing for entries with code, URLs, or long unpunctuated text by lowering the chunk target to 256 tokens for safer headroom against the model's 512-token context window.</p>
37+
<p>Sequence log backfill now includes agent entities and agent links alongside journal entries and entry links, closing sync gaps for agent data.</p>
3738
</description>
3839
</release>
3940
<release version="0.9.903" date="2026-03-06">

lib/features/agents/database/agent_database.dart

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import 'dart:convert';
12
import 'dart:io';
23

34
import 'package:drift/drift.dart';
@@ -69,4 +70,96 @@ class AgentDatabase extends _$AgentDatabase {
6970
},
7071
);
7172
}
73+
74+
/// Stream agent entities with their vector clocks for populating the
75+
/// sequence log. Yields batches of records with entity ID and vector
76+
/// clock map. Uses lightweight SQL + JSON extraction to avoid full
77+
/// deserialization.
78+
Stream<List<({String id, Map<String, int>? vectorClock})>>
79+
streamAgentEntitiesWithVectorClock({int batchSize = 1000}) async* {
80+
var offset = 0;
81+
82+
while (true) {
83+
final rows = await customSelect(
84+
'SELECT id, serialized FROM agent_entities '
85+
'ORDER BY id LIMIT ? OFFSET ?',
86+
variables: [Variable(batchSize), Variable(offset)],
87+
).get();
88+
89+
if (rows.isEmpty) break;
90+
91+
yield rows
92+
.map(
93+
(row) => (
94+
id: row.read<String>('id'),
95+
vectorClock: _extractVectorClock(row.read<String>('serialized')),
96+
),
97+
)
98+
.toList();
99+
offset += batchSize;
100+
}
101+
}
102+
103+
/// Stream agent links with their vector clocks for populating the
104+
/// sequence log. Yields batches of records with link ID and vector
105+
/// clock map.
106+
Stream<List<({String id, Map<String, int>? vectorClock})>>
107+
streamAgentLinksWithVectorClock({int batchSize = 1000}) async* {
108+
var offset = 0;
109+
110+
while (true) {
111+
final rows = await customSelect(
112+
'SELECT id, serialized FROM agent_links '
113+
'ORDER BY id LIMIT ? OFFSET ?',
114+
variables: [Variable(batchSize), Variable(offset)],
115+
).get();
116+
117+
if (rows.isEmpty) break;
118+
119+
yield rows
120+
.map(
121+
(row) => (
122+
id: row.read<String>('id'),
123+
vectorClock: _extractVectorClock(row.read<String>('serialized')),
124+
),
125+
)
126+
.toList();
127+
offset += batchSize;
128+
}
129+
}
130+
131+
/// Count total agent entities for progress reporting.
132+
Future<int> countAllAgentEntities() => _countAll('agent_entities');
133+
134+
/// Count total agent links for progress reporting.
135+
Future<int> countAllAgentLinks() => _countAll('agent_links');
136+
137+
Future<int> _countAll(String tableName) async {
138+
final result = await customSelect(
139+
'SELECT COUNT(*) AS cnt FROM $tableName',
140+
).getSingle();
141+
return result.read<int>('cnt');
142+
}
143+
144+
/// Lightweight extraction of vector clock from serialized JSON.
145+
/// Agent entities and links store vectorClock at JSON root level.
146+
/// Returns null for any malformed data rather than throwing.
147+
static Map<String, int>? _extractVectorClock(String serialized) {
148+
try {
149+
final decoded = jsonDecode(serialized);
150+
if (decoded is! Map<String, dynamic>) return null;
151+
152+
final vc = decoded['vectorClock'];
153+
if (vc is! Map<String, dynamic>) return null;
154+
155+
final result = <String, int>{};
156+
for (final entry in vc.entries) {
157+
if (entry.value is! num) return null;
158+
result[entry.key] = (entry.value as num).toInt();
159+
}
160+
return result;
161+
} on Object catch (_) {
162+
return null;
163+
}
164+
}
72165
}

lib/features/agents/state/agent_providers.dart

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,10 @@ DomainLogger domainLogger(Ref ref) {
110110
return logger;
111111
}
112112

113-
/// The agent database instance (lazy singleton).
113+
/// The agent database instance (singleton via GetIt).
114114
@Riverpod(keepAlive: true)
115115
AgentDatabase agentDatabase(Ref ref) {
116-
final db = AgentDatabase();
117-
ref.onDispose(db.close);
118-
return db;
116+
return getIt<AgentDatabase>();
119117
}
120118

121119
/// The agent repository wrapping the database.

lib/features/sync/sequence/sync_sequence_log_service.dart

Lines changed: 71 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -729,150 +729,102 @@ class SyncSequenceLogService {
729729
}
730730

731731
/// Populate the sequence log from existing journal entries.
732-
/// This is used to backfill the sequence log for entries that were
733-
/// created before the sequence log feature was added.
734-
///
735-
/// This method streams journal entries in batches and records their vector
736-
/// clocks in the sequence log. Records entries for ALL hosts in each entry's
737-
/// vector clock so any device with the entry can respond to backfill requests.
738-
///
739-
/// [onProgress] is called with progress from 0.0 to 1.0 as entries are
740-
/// processed.
741-
///
742-
/// Returns the number of entries populated.
732+
/// Returns the number of sequence log entries populated.
743733
Future<int> populateFromJournal({
744734
required Stream<List<({String id, Map<String, int>? vectorClock})>>
745735
entryStream,
746736
required Future<int> Function() getTotalCount,
747737
void Function(double progress)? onProgress,
748-
}) async {
749-
final total = await getTotalCount();
750-
var processed = 0;
751-
var populated = 0;
752-
final now = DateTime.now();
753-
754-
// Cache of existing (hostId, counter) pairs to avoid duplicates
755-
// We'll populate this lazily per-host as we encounter them
756-
final existingByHost = <String, Set<int>>{};
757-
758-
await for (final batch in entryStream) {
759-
final toInsert = <SyncSequenceLogCompanion>[];
760-
761-
for (final entry in batch) {
762-
processed++;
763-
764-
final vc = entry.vectorClock;
765-
if (vc == null || vc.isEmpty) continue;
766-
767-
// Find the originating host (the one with the highest counter,
768-
// which is typically the creator of this specific entry version)
769-
String? originatingHost;
770-
var maxCounter = 0;
771-
for (final e in vc.entries) {
772-
if (e.value > maxCounter) {
773-
maxCounter = e.value;
774-
originatingHost = e.key;
775-
}
776-
}
777-
778-
// Record entry for each host in the vector clock
779-
for (final vcEntry in vc.entries) {
780-
final hostId = vcEntry.key;
781-
final counter = vcEntry.value;
782-
783-
// Lazily load existing counters for this host
784-
if (!existingByHost.containsKey(hostId)) {
785-
existingByHost[hostId] =
786-
await _syncDatabase.getCountersForHost(hostId);
787-
}
788-
789-
final existing = existingByHost[hostId]!;
790-
791-
// Skip if already exists
792-
if (existing.contains(counter)) continue;
793-
794-
// Mark as existing to avoid duplicates within this run
795-
existing.add(counter);
796-
797-
toInsert.add(
798-
SyncSequenceLogCompanion(
799-
hostId: Value(hostId),
800-
counter: Value(counter),
801-
entryId: Value(entry.id),
802-
originatingHostId: Value(originatingHost ?? hostId),
803-
status: Value(SyncSequenceStatus.received.index),
804-
createdAt: Value(now),
805-
updatedAt: Value(now),
806-
),
807-
);
808-
}
809-
}
810-
811-
// Batch insert
812-
if (toInsert.isNotEmpty) {
813-
await _syncDatabase.batchInsertSequenceEntries(toInsert);
814-
populated += toInsert.length;
815-
}
816-
817-
// Report progress after each batch
818-
if (onProgress != null && total > 0) {
819-
onProgress(processed / total);
820-
}
821-
}
822-
823-
if (populated > 0) {
824-
_loggingService.captureEvent(
825-
'populateFromJournal: added $populated sequence log entries',
826-
domain: 'SYNC_SEQUENCE',
827-
subDomain: 'populate',
828-
);
829-
}
830-
831-
return populated;
738+
}) {
739+
return _populateFromStream(
740+
dataStream: entryStream,
741+
getTotalCount: getTotalCount,
742+
onProgress: onProgress,
743+
payloadType: SyncSequencePayloadType.journalEntity,
744+
label: 'populateFromJournal',
745+
);
832746
}
833747

834748
/// Populate the sequence log from existing entry links.
835-
/// This is used to backfill the sequence log for entry links that were
836-
/// created before the sequence log feature was added, or to resolve
837-
/// "ghost missing" counters that correspond to EntryLink operations.
838-
///
839-
/// This method streams entry links in batches and records their vector
840-
/// clocks in the sequence log with [SyncSequencePayloadType.entryLink].
841-
/// Records entries for ALL hosts in each link's vector clock so any device
842-
/// with the link can respond to backfill requests.
843-
///
844-
/// [onProgress] is called with progress from 0.0 to 1.0 as links are
845-
/// processed.
846-
///
847-
/// Returns the number of entries populated.
749+
/// Returns the number of sequence log entries populated.
848750
Future<int> populateFromEntryLinks({
849751
required Stream<List<({String id, Map<String, int>? vectorClock})>>
850752
linkStream,
851753
required Future<int> Function() getTotalCount,
852754
void Function(double progress)? onProgress,
755+
}) {
756+
return _populateFromStream(
757+
dataStream: linkStream,
758+
getTotalCount: getTotalCount,
759+
onProgress: onProgress,
760+
payloadType: SyncSequencePayloadType.entryLink,
761+
label: 'populateFromEntryLinks',
762+
);
763+
}
764+
765+
/// Populate the sequence log from existing agent entities.
766+
/// Returns the number of sequence log entries populated.
767+
Future<int> populateFromAgentEntities({
768+
required Stream<List<({String id, Map<String, int>? vectorClock})>>
769+
entityStream,
770+
required Future<int> Function() getTotalCount,
771+
void Function(double progress)? onProgress,
772+
}) {
773+
return _populateFromStream(
774+
dataStream: entityStream,
775+
getTotalCount: getTotalCount,
776+
onProgress: onProgress,
777+
payloadType: SyncSequencePayloadType.agentEntity,
778+
label: 'populateFromAgentEntities',
779+
);
780+
}
781+
782+
/// Populate the sequence log from existing agent links.
783+
/// Returns the number of sequence log entries populated.
784+
Future<int> populateFromAgentLinks({
785+
required Stream<List<({String id, Map<String, int>? vectorClock})>>
786+
linkStream,
787+
required Future<int> Function() getTotalCount,
788+
void Function(double progress)? onProgress,
789+
}) {
790+
return _populateFromStream(
791+
dataStream: linkStream,
792+
getTotalCount: getTotalCount,
793+
onProgress: onProgress,
794+
payloadType: SyncSequencePayloadType.agentLink,
795+
label: 'populateFromAgentLinks',
796+
);
797+
}
798+
799+
/// Shared implementation for populating the sequence log from a paginated
800+
/// stream of records with vector clocks. Used by all four populate methods.
801+
Future<int> _populateFromStream({
802+
required Stream<List<({String id, Map<String, int>? vectorClock})>>
803+
dataStream,
804+
required Future<int> Function() getTotalCount,
805+
required SyncSequencePayloadType payloadType,
806+
required String label,
807+
void Function(double progress)? onProgress,
853808
}) async {
854809
final total = await getTotalCount();
855810
var processed = 0;
856811
var populated = 0;
857812
final now = DateTime.now();
858813

859814
// Cache of existing (hostId, counter) pairs to avoid duplicates
860-
// We'll populate this lazily per-host as we encounter them
861815
final existingByHost = <String, Set<int>>{};
862816

863-
await for (final batch in linkStream) {
817+
await for (final batch in dataStream) {
864818
final toInsert = <SyncSequenceLogCompanion>[];
865819

866-
for (final link in batch) {
820+
for (final record in batch) {
867821
processed++;
868822

869-
final vc = link.vectorClock;
823+
final vc = record.vectorClock;
870824
if (vc == null || vc.isEmpty) continue;
871825

872-
// Find the originating host (the one with the highest counter,
873-
// which is typically the creator of this specific link version).
874-
// Sort entries by host ID first to ensure deterministic tie-breaking
875-
// when multiple hosts have the same max counter.
826+
// Find the originating host (the one with the highest counter).
827+
// Sort entries by host ID for deterministic tie-breaking.
876828
String? originatingHost;
877829
var maxCounter = -1;
878830
final sortedEntries = vc.entries.toList()
@@ -907,8 +859,8 @@ class SyncSequenceLogService {
907859
SyncSequenceLogCompanion(
908860
hostId: Value(hostId),
909861
counter: Value(counter),
910-
entryId: Value(link.id),
911-
payloadType: Value(SyncSequencePayloadType.entryLink.index),
862+
entryId: Value(record.id),
863+
payloadType: Value(payloadType.index),
912864
originatingHostId: Value(originatingHost ?? hostId),
913865
status: Value(SyncSequenceStatus.received.index),
914866
createdAt: Value(now),
@@ -932,7 +884,7 @@ class SyncSequenceLogService {
932884

933885
if (populated > 0) {
934886
_loggingService.captureEvent(
935-
'populateFromEntryLinks: added $populated sequence log entries',
887+
'$label: added $populated sequence log entries',
936888
domain: 'SYNC_SEQUENCE',
937889
subDomain: 'populate',
938890
);

0 commit comments

Comments
 (0)