Skip to content

Commit 2c11cf1

Browse files
PatStilesuri-99Oppen
authored
fix(audit): Remove Task data from aggregator after a response has been responded or expires. (#1004)
Co-authored-by: Uriel Mihura <[email protected]> Co-authored-by: Mario Rugiero <[email protected]>
1 parent b10fc6f commit 2c11cf1

File tree

6 files changed

+572
-441
lines changed

6 files changed

+572
-441
lines changed

aggregator/cmd/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ func aggregatorMain(ctx *cli.Context) error {
4949
return err
5050
}
5151

52+
// Supervisor revives garbage collector
53+
go func() {
54+
for {
55+
log.Println("Starting Garbage collector")
56+
aggregator.ClearTasksFromMaps()
57+
log.Println("Garbage collector panicked, Supervisor restarting")
58+
}
59+
}()
60+
5261
// Listen for new task created in the ServiceManager contract in a separate goroutine, both V1 and V2 subscriptions:
5362
go func() {
5463
listenErr := aggregator.SubscribeToNewTasks()

aggregator/internal/pkg/aggregator.go

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ type Aggregator struct {
6060
// and can start from zero
6161
batchesIdxByIdentifierHash map[[32]byte]uint32
6262

63-
// Stores the taskCreatedBlock for each batch bt batch index
63+
// Stores the taskCreatedBlock for each batch by batch index
6464
batchCreatedBlockByIdx map[uint32]uint64
6565

6666
// Stores the TaskResponse for each batch by batchIdentifierHash
@@ -215,6 +215,8 @@ func (agg *Aggregator) Start(ctx context.Context) error {
215215

216216
const MaxSentTxRetries = 5
217217

218+
const BLS_AGG_SERVICE_TIMEOUT = 100 * time.Second
219+
218220
func (agg *Aggregator) handleBlsAggServiceResponse(blsAggServiceResp blsagg.BlsAggregationServiceResponse) {
219221
agg.taskMutex.Lock()
220222
agg.AggregatorConfig.BaseConfig.Logger.Info("- Locked Resources: Fetching task data")
@@ -275,6 +277,7 @@ func (agg *Aggregator) handleBlsAggServiceResponse(blsAggServiceResp blsagg.BlsA
275277
agg.logger.Info("Aggregator successfully responded to task",
276278
"taskIndex", blsAggServiceResp.TaskIndex,
277279
"batchIdentifierHash", "0x"+hex.EncodeToString(batchIdentifierHash[:]))
280+
278281
return
279282
}
280283

@@ -361,12 +364,17 @@ func (agg *Aggregator) AddNewTask(batchMerkleRoot [32]byte, senderAddress [20]by
361364
BatchMerkleRoot: batchMerkleRoot,
362365
SenderAddress: senderAddress,
363366
}
367+
agg.logger.Info(
368+
"Task Info added in aggregator:",
369+
"Task", batchIndex,
370+
"batchIdentifierHash", batchIdentifierHash,
371+
)
364372
agg.nextBatchIndex += 1
365373

366374
quorumNums := eigentypes.QuorumNums{eigentypes.QuorumNum(QUORUM_NUMBER)}
367375
quorumThresholdPercentages := eigentypes.QuorumThresholdPercentages{eigentypes.QuorumThresholdPercentage(QUORUM_THRESHOLD)}
368376

369-
err := agg.blsAggregationService.InitializeNewTask(batchIndex, taskCreatedBlock, quorumNums, quorumThresholdPercentages, 100*time.Second)
377+
err := agg.blsAggregationService.InitializeNewTask(batchIndex, taskCreatedBlock, quorumNums, quorumThresholdPercentages, BLS_AGG_SERVICE_TIMEOUT)
370378
// FIXME(marian): When this errors, should we retry initializing new task? Logging fatal for now.
371379
if err != nil {
372380
agg.logger.Fatalf("BLS aggregation service error when initializing new task: %s", err)
@@ -377,3 +385,51 @@ func (agg *Aggregator) AddNewTask(batchMerkleRoot [32]byte, senderAddress [20]by
377385
agg.AggregatorConfig.BaseConfig.Logger.Info("- Unlocked Resources: Adding new task")
378386
agg.logger.Info("New task added", "batchIndex", batchIndex, "batchIdentifierHash", "0x"+hex.EncodeToString(batchIdentifierHash[:]))
379387
}
388+
389+
// Long-lived goroutine that periodically checks and removes old Tasks from stored Maps
390+
// It runs every GarbageCollectorPeriod and removes all tasks older than GarbageCollectorTasksAge
391+
// This was added because each task occupies memory in the maps, and we need to free it to avoid a memory leak
392+
func (agg *Aggregator) ClearTasksFromMaps() {
393+
defer func() {
394+
err := recover() //stops panics
395+
if err != nil {
396+
agg.logger.Error("Recovered from panic", "err", err)
397+
}
398+
}()
399+
400+
agg.AggregatorConfig.BaseConfig.Logger.Info(fmt.Sprintf("- Removing finalized Task Infos from Maps every %v", agg.AggregatorConfig.Aggregator.GarbageCollectorPeriod))
401+
lastIdxDeleted := uint32(0)
402+
403+
for {
404+
time.Sleep(agg.AggregatorConfig.Aggregator.GarbageCollectorPeriod)
405+
406+
agg.AggregatorConfig.BaseConfig.Logger.Info("Cleaning finalized tasks from maps")
407+
oldTaskIdHash, err := agg.avsReader.GetOldTaskHash(agg.AggregatorConfig.Aggregator.GarbageCollectorTasksAge, agg.AggregatorConfig.Aggregator.GarbageCollectorTasksInterval)
408+
if err != nil {
409+
agg.logger.Error("Error getting old task hash, skipping this garbage collect", "err", err)
410+
continue // Retry in the next iteration
411+
}
412+
if oldTaskIdHash == nil {
413+
agg.logger.Warn("No old tasks found")
414+
continue // Retry in the next iteration
415+
}
416+
417+
taskIdxToDelete := agg.batchesIdxByIdentifierHash[*oldTaskIdHash]
418+
agg.logger.Info("Old task found", "taskIndex", taskIdxToDelete)
419+
// delete from lastIdxDeleted to taskIdxToDelete
420+
for i := lastIdxDeleted + 1; i <= taskIdxToDelete; i++ {
421+
batchIdentifierHash, exists := agg.batchesIdentifierHashByIdx[i]
422+
if exists {
423+
agg.logger.Info("Cleaning up finalized task", "taskIndex", i)
424+
delete(agg.batchesIdxByIdentifierHash, batchIdentifierHash)
425+
delete(agg.batchCreatedBlockByIdx, i)
426+
delete(agg.batchesIdentifierHashByIdx, i)
427+
delete(agg.batchDataByIdentifierHash, batchIdentifierHash)
428+
} else {
429+
agg.logger.Warn("Task not found in maps", "taskIndex", i)
430+
}
431+
}
432+
lastIdxDeleted = taskIdxToDelete
433+
agg.AggregatorConfig.BaseConfig.Logger.Info("Done cleaning finalized tasks from maps")
434+
}
435+
}

0 commit comments

Comments
 (0)