Skip to content

Commit 86bb230

Browse files
author
Julian Ventura
committed
Merge branch 'staging' into fix/aggregator-recover-lost-batches
2 parents 9d56887 + 635c8f0 commit 86bb230

File tree

62 files changed

+5629
-536
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+5629
-536
lines changed

Makefile

Lines changed: 123 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ anvil_start_with_block_time:
132132
@echo "Starting Anvil..."
133133
anvil --load-state contracts/scripts/anvil/state/alignedlayer-deployed-anvil-state.json --block-time 7
134134

135+
anvil_start_with_block_time_with_more_prefunded_accounts:
136+
@echo "Starting Anvil..."
137+
anvil --load-state contracts/scripts/anvil/state/alignedlayer-deployed-anvil-state.json --block-time 7 -a 2000
138+
135139
_AGGREGATOR_:
136140

137141
aggregator_start:
@@ -247,11 +251,15 @@ operator_deposit_into_mock_strategy:
247251
--strategy-address $(STRATEGY_ADDRESS) \
248252
--amount 100000000000000000
249253

254+
255+
AMOUNT ?= 1000
256+
250257
operator_deposit_into_strategy:
251258
@echo "Depositing into strategy"
252259
@go run operator/cmd/main.go deposit-into-strategy \
253260
--config $(CONFIG_FILE) \
254-
--amount 1000
261+
--strategy-address $(STRATEGY_ADDRESS) \
262+
--amount $(AMOUNT)
255263

256264
operator_register_with_aligned_layer:
257265
@echo "Registering operator with AlignedLayer"
@@ -280,7 +288,7 @@ verifier_disable:
280288

281289
__BATCHER__:
282290

283-
BURST_SIZE=5
291+
BURST_SIZE ?= 5
284292

285293
user_fund_payment_service:
286294
@. ./scripts/user_fund_payment_service_devnet.sh
@@ -297,6 +305,11 @@ batcher_start_local: user_fund_payment_service
297305
@$(MAKE) run_storage &
298306
@cargo run --manifest-path ./batcher/aligned-batcher/Cargo.toml --release -- --config ./config-files/config-batcher.yaml --env-file ./batcher/aligned-batcher/.env.dev
299307

308+
batcher_start_local_no_fund:
309+
@echo "Starting Batcher..."
310+
@$(MAKE) run_storage &
311+
@cargo run --manifest-path ./batcher/aligned-batcher/Cargo.toml --release -- --config ./config-files/config-batcher.yaml --env-file ./batcher/aligned-batcher/.env.dev
312+
300313
install_batcher:
301314
@cargo install --path batcher/aligned-batcher
302315

@@ -397,7 +410,7 @@ batcher_send_plonk_bn254_burst: batcher/target/release/aligned
397410
--vk ../../scripts/test_files/gnark_plonk_bn254_script/plonk.vk \
398411
--proof_generator_addr 0x66f9664f97F2b50F62D13eA064982f936dE76657 \
399412
--rpc_url $(RPC_URL) \
400-
--repetitions 4 \
413+
--repetitions $(BURST_SIZE) \
401414
--network $(NETWORK)
402415

403416
batcher_send_plonk_bls12_381_task: batcher/target/release/aligned
@@ -444,6 +457,82 @@ batcher_send_burst_groth16: batcher/target/release/aligned
444457
@mkdir -p scripts/test_files/gnark_groth16_bn254_infinite_script/infinite_proofs
445458
@./batcher/aligned/send_burst_tasks.sh $(BURST_SIZE) $(START_COUNTER)
446459

460+
461+
__TASK_SENDER__:
462+
BURST_TIME_SECS ?= 3
463+
464+
task_sender_generate_groth16_proofs:
465+
@cd batcher/aligned-task-sender && \
466+
cargo run --release -- generate-proofs \
467+
--number-of-proofs $(NUMBER_OF_PROOFS) --proof-type groth16 \
468+
--dir-to-save-proofs $(CURDIR)/scripts/test_files/task_sender/proofs
469+
470+
# ===== DEVNET =====
471+
task_sender_fund_wallets_devnet:
472+
@cd batcher/aligned-task-sender && \
473+
cargo run --release -- generate-and-fund-wallets \
474+
--eth-rpc-url http://localhost:8545 \
475+
--network devnet \
476+
--amount-to-deposit 1 \
477+
--amount-to-deposit-to-aligned 0.9999 \
478+
--private-keys-filepath $(CURDIR)/batcher/aligned-task-sender/wallets/devnet
479+
480+
task_sender_send_infinite_proofs_devnet:
481+
@cd batcher/aligned-task-sender && \
482+
cargo run --release -- send-infinite-proofs \
483+
--burst-size $(BURST_SIZE) --burst-time-secs $(BURST_TIME_SECS) \
484+
--eth-rpc-url http://localhost:8545 \
485+
--batcher-url ws://localhost:8080 \
486+
--network devnet \
487+
--proofs-dirpath $(CURDIR)/scripts/test_files/task_sender/proofs \
488+
--private-keys-filepath $(CURDIR)/batcher/aligned-task-sender/wallets/devnet
489+
490+
task_sender_test_connections_devnet:
491+
@cd batcher/aligned-task-sender && \
492+
cargo run --release -- test-connections \
493+
--batcher-url ws://localhost:8080 \
494+
--num-senders $(NUM_SENDERS)
495+
496+
# ===== HOLESKY-STAGE =====
497+
task_sender_generate_and_fund_wallets_holesky_stage:
498+
@cd batcher/aligned-task-sender && \
499+
cargo run --release -- generate-and-fund-wallets \
500+
--eth-rpc-url https://ethereum-holesky-rpc.publicnode.com \
501+
--network holesky-stage \
502+
--funding-wallet-private-key $(FUNDING_WALLET_PRIVATE_KEY) \
503+
--number-wallets $(NUM_WALLETS) \
504+
--amount-to-deposit $(AMOUNT_TO_DEPOSIT) \
505+
--amount-to-deposit-to-aligned $(AMOUNT_TO_DEPOSIT_TO_ALIGNED) \
506+
--private-keys-filepath $(CURDIR)/batcher/aligned-task-sender/wallets/holesky-stage
507+
508+
task_sender_send_infinite_proofs_holesky_stage:
509+
@cd batcher/aligned-task-sender && \
510+
cargo run --release -- send-infinite-proofs \
511+
--burst-size $(BURST_SIZE) --burst-time-secs $(BURST_TIME_SECS) \
512+
--eth-rpc-url https://ethereum-holesky-rpc.publicnode.com \
513+
--batcher-url wss://stage.batcher.alignedlayer.com \
514+
--network holesky-stage \
515+
--proofs-dirpath $(CURDIR)/scripts/test_files/task_sender/proofs \
516+
--private-keys-filepath $(CURDIR)/batcher/aligned-task-sender/wallets/holesky-stage
517+
518+
task_sender_test_connections_holesky_stage:
519+
@cd batcher/aligned-task-sender && \
520+
cargo run --release -- test-connections \
521+
--batcher-url wss://stage.batcher.alignedlayer.com \
522+
--num-senders $(NUM_SENDERS)
523+
524+
__UTILS__:
525+
aligned_get_user_balance_devnet:
526+
@cd batcher/aligned/ && cargo run --release -- get-user-balance \
527+
--user_addr $(USER_ADDR)
528+
529+
aligned_get_user_balance_holesky:
530+
@cd batcher/aligned/ && cargo run --release -- get-user-balance \
531+
--rpc_url https://ethereum-holesky-rpc.publicnode.com \
532+
--network holesky \
533+
--user_addr $(USER_ADDR)
534+
535+
447536
__GENERATE_PROOFS__:
448537
# TODO add a default proving system
449538

@@ -1087,3 +1176,34 @@ ansible_batcher_deploy: ## Deploy the Batcher. Parameters: INVENTORY, KEYSTORE
10871176
@ansible-playbook infra/ansible/playbooks/batcher.yaml \
10881177
-i $(INVENTORY) \
10891178
-e "keystore_path=$(KEYSTORE)"
1179+
1180+
ansible_aggregator_create_env: ## Create empty variables files for the Aggregator deploy
1181+
@cp -n infra/ansible/playbooks/ini/config-aggregator.ini.example infra/ansible/playbooks/ini/config-aggregator.ini
1182+
@echo "Config files for the Aggregator created in infra/ansible/playbooks/ini"
1183+
@echo "Please complete the values and run make ansible_aggregator_deploy"
1184+
1185+
ansible_aggregator_deploy: ## Deploy the Operator. Parameters: INVENTORY
1186+
@if [ -z "$(INVENTORY)" ] || [ -z "$(ECDSA_KEYSTORE)" ] || [ -z "$(BLS_KEYSTORE)" ]; then \
1187+
echo "Error: INVENTORY, ECDSA_KEYSTORE, BLS_KEYSTORE must be set."; \
1188+
exit 1; \
1189+
fi
1190+
@ansible-playbook infra/ansible/playbooks/aggregator.yaml \
1191+
-i $(INVENTORY) \
1192+
-e "ecdsa_keystore_path=$(ECDSA_KEYSTORE)" \
1193+
-e "bls_keystore_path=$(BLS_KEYSTORE)"
1194+
1195+
ansible_operator_create_env: ## Create empty variables files for the Operator deploy
1196+
@cp -n infra/ansible/playbooks/ini/config-operator.ini.example infra/ansible/playbooks/ini/config-operator.ini
1197+
@cp -n infra/ansible/playbooks/ini/config-register-operator.ini.example infra/ansible/playbooks/ini/config-register-operator.ini
1198+
@echo "Config files for the Operator created in infra/ansible/playbooks/ini"
1199+
@echo "Please complete the values and run make ansible_operator_deploy"
1200+
1201+
ansible_operator_deploy: ## Deploy the Operator. Parameters: INVENTORY
1202+
@if [ -z "$(INVENTORY)" ] || [ -z "$(ECDSA_KEYSTORE)" ] || [ -z "$(BLS_KEYSTORE)" ]; then \
1203+
echo "Error: INVENTORY, ECDSA_KEYSTORE, BLS_KEYSTORE must be set."; \
1204+
exit 1; \
1205+
fi
1206+
@ansible-playbook infra/ansible/playbooks/operator.yaml \
1207+
-i $(INVENTORY) \
1208+
-e "ecdsa_keystore_path=$(ECDSA_KEYSTORE)" \
1209+
-e "bls_keystore_path=$(BLS_KEYSTORE)"

aggregator/pkg/aggregator.go

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,12 @@ type Aggregator struct {
7474
// Note: In case of a reboot it can start from 0 again
7575
nextBatchIndex uint32
7676

77-
// Mutex to protect batchesIdentifierHashByIdx, batchesIdxByIdentifierHash and nextBatchIndex
77+
// Mutex to protect:
78+
// - batchesIdentifierHashByIdx
79+
// - batchesIdxByIdentifierHash
80+
// - batchCreatedBlockByIdx
81+
// - batchDataByIdentifierHash
82+
// - nextBatchIndex
7883
taskMutex *sync.Mutex
7984

8085
// Mutex to protect ethereum wallet
@@ -93,6 +98,15 @@ type Aggregator struct {
9398
func NewAggregator(aggregatorConfig config.AggregatorConfig) (*Aggregator, error) {
9499
newBatchChan := make(chan *servicemanager.ContractAlignedLayerServiceManagerNewBatchV3)
95100

101+
logger := aggregatorConfig.BaseConfig.Logger
102+
103+
// Metrics
104+
reg := prometheus.NewRegistry()
105+
aggregatorMetrics := metrics.NewMetrics(aggregatorConfig.Aggregator.MetricsIpPortAddress, reg, logger)
106+
107+
// Telemetry
108+
aggregatorTelemetry := NewTelemetry(aggregatorConfig.Aggregator.TelemetryIpPortAddress, logger)
109+
96110
avsReader, err := chainio.NewAvsReaderFromConfig(aggregatorConfig.BaseConfig, aggregatorConfig.EcdsaConfig)
97111
if err != nil {
98112
return nil, err
@@ -103,7 +117,7 @@ func NewAggregator(aggregatorConfig config.AggregatorConfig) (*Aggregator, error
103117
return nil, err
104118
}
105119

106-
avsWriter, err := chainio.NewAvsWriterFromConfig(aggregatorConfig.BaseConfig, aggregatorConfig.EcdsaConfig)
120+
avsWriter, err := chainio.NewAvsWriterFromConfig(aggregatorConfig.BaseConfig, aggregatorConfig.EcdsaConfig, aggregatorMetrics)
107121
if err != nil {
108122
return nil, err
109123
}
@@ -124,7 +138,6 @@ func NewAggregator(aggregatorConfig config.AggregatorConfig) (*Aggregator, error
124138

125139
aggregatorPrivateKey := aggregatorConfig.EcdsaConfig.PrivateKey
126140

127-
logger := aggregatorConfig.BaseConfig.Logger
128141
clients, err := sdkclients.BuildAll(chainioConfig, aggregatorPrivateKey, logger)
129142
if err != nil {
130143
logger.Errorf("Cannot create sdk clients", "err", err)
@@ -150,13 +163,6 @@ func NewAggregator(aggregatorConfig config.AggregatorConfig) (*Aggregator, error
150163
avsRegistryService := avsregistry.NewAvsRegistryServiceChainCaller(avsReader.ChainReader, operatorPubkeysService, logger)
151164
blsAggregationService := blsagg.NewBlsAggregatorService(avsRegistryService, hashFunction, logger)
152165

153-
// Metrics
154-
reg := prometheus.NewRegistry()
155-
aggregatorMetrics := metrics.NewMetrics(aggregatorConfig.Aggregator.MetricsIpPortAddress, reg, logger)
156-
157-
// Telemetry
158-
aggregatorTelemetry := NewTelemetry(aggregatorConfig.Aggregator.TelemetryIpPortAddress, logger)
159-
160166
nextBatchIndex := uint32(0)
161167

162168
aggregator := Aggregator{
@@ -219,6 +225,13 @@ func (agg *Aggregator) Start(ctx context.Context) error {
219225
const MaxSentTxRetries = 5
220226

221227
func (agg *Aggregator) handleBlsAggServiceResponse(blsAggServiceResp blsagg.BlsAggregationServiceResponse) {
228+
defer func() {
229+
err := recover() //stops panics
230+
if err != nil {
231+
agg.logger.Error("handleBlsAggServiceResponse recovered from panic", "err", err)
232+
}
233+
}()
234+
222235
agg.taskMutex.Lock()
223236
agg.AggregatorConfig.BaseConfig.Logger.Info("- Locked Resources: Fetching task data")
224237
batchIdentifierHash := agg.batchesIdentifierHashByIdx[blsAggServiceResp.TaskIndex]
@@ -271,10 +284,15 @@ func (agg *Aggregator) handleBlsAggServiceResponse(blsAggServiceResp blsagg.BlsA
271284
}
272285

273286
agg.logger.Info("Sending aggregated response onchain", "taskIndex", blsAggServiceResp.TaskIndex,
274-
"batchIdentifierHash", "0x"+hex.EncodeToString(batchIdentifierHash[:]))
287+
"batchIdentifierHash", "0x"+hex.EncodeToString(batchIdentifierHash[:]), "merkleRoot", "0x"+hex.EncodeToString(batchData.BatchMerkleRoot[:]))
275288
receipt, err := agg.sendAggregatedResponse(batchIdentifierHash, batchData.BatchMerkleRoot, batchData.SenderAddress, nonSignerStakesAndSignature)
276289
if err == nil {
277-
agg.telemetry.TaskSentToEthereum(batchData.BatchMerkleRoot, receipt.TxHash.String())
290+
// In some cases, we may fail to retrieve the receipt for the transaction.
291+
txHash := "Unknown"
292+
if receipt != nil {
293+
txHash = receipt.TxHash.String()
294+
}
295+
agg.telemetry.TaskSentToEthereum(batchData.BatchMerkleRoot, txHash)
278296
agg.logger.Info("Aggregator successfully responded to task",
279297
"taskIndex", blsAggServiceResp.TaskIndex,
280298
"batchIdentifierHash", "0x"+hex.EncodeToString(batchIdentifierHash[:]))
@@ -378,8 +396,7 @@ func (agg *Aggregator) AddNewTask(batchMerkleRoot [32]byte, senderAddress [20]by
378396
quorumNums := eigentypes.QuorumNums{eigentypes.QuorumNum(QUORUM_NUMBER)}
379397
quorumThresholdPercentages := eigentypes.QuorumThresholdPercentages{eigentypes.QuorumThresholdPercentage(QUORUM_THRESHOLD)}
380398

381-
err := agg.blsAggregationService.InitializeNewTask(batchIndex, taskCreatedBlock, quorumNums, quorumThresholdPercentages, agg.AggregatorConfig.Aggregator.BlsServiceTaskTimeout)
382-
// FIXME(marian): When this errors, should we retry initializing new task? Logging fatal for now.
399+
err := agg.InitializeNewTaskRetryable(batchIndex, taskCreatedBlock, quorumNums, quorumThresholdPercentages, agg.AggregatorConfig.Aggregator.BlsServiceTaskTimeout)
383400
if err != nil {
384401
agg.logger.Fatalf("BLS aggregation service error when initializing new task: %s", err)
385402
}
@@ -393,15 +410,17 @@ func (agg *Aggregator) AddNewTask(batchMerkleRoot [32]byte, senderAddress [20]by
393410
// |---RETRYABLE---|
394411

395412
/*
413+
InitializeNewTaskRetryable
414+
Initialize a new task in the BLS Aggregation service
396415
- Errors:
397416
Permanent:
398417
- TaskAlreadyInitializedError (Permanent): Task is already intialized in the BLS Aggregation service (https://github.com/Layr-Labs/eigensdk-go/blob/dev/services/bls_aggregation/blsagg.go#L27).
399418
Transient:
400419
- All others.
401-
- Retry times (3 retries): 12 sec (1 Blocks), 24 sec (2 Blocks), 48 sec (4 Blocks)
420+
- Retry times (3 retries): 1 sec, 2 sec, 4 sec
402421
*/
403-
func (agg *Aggregator) InitializeNewTask(batchIndex uint32, taskCreatedBlock uint32, quorumNums eigentypes.QuorumNums, quorumThresholdPercentages eigentypes.QuorumThresholdPercentages, timeToExpiry time.Duration) error {
404-
initilizeNewTask_func := func() error {
422+
func (agg *Aggregator) InitializeNewTaskRetryable(batchIndex uint32, taskCreatedBlock uint32, quorumNums eigentypes.QuorumNums, quorumThresholdPercentages eigentypes.QuorumThresholdPercentages, timeToExpiry time.Duration) error {
423+
initializeNewTask_func := func() error {
405424
err := agg.blsAggregationService.InitializeNewTask(batchIndex, taskCreatedBlock, quorumNums, quorumThresholdPercentages, timeToExpiry)
406425
if err != nil {
407426
// Task is already initialized
@@ -411,7 +430,7 @@ func (agg *Aggregator) InitializeNewTask(batchIndex uint32, taskCreatedBlock uin
411430
}
412431
return err
413432
}
414-
return retry.Retry(initilizeNewTask_func, retry.MinDelayChain, retry.RetryFactor, retry.NumRetries, retry.MaxIntervalChain, retry.MaxElapsedTime)
433+
return retry.Retry(initializeNewTask_func, retry.MinDelay, retry.RetryFactor, retry.NumRetries, retry.MaxInterval, retry.MaxElapsedTime)
415434
}
416435

417436
// Long-lived goroutine that periodically checks and removes old Tasks from stored Maps
@@ -421,7 +440,7 @@ func (agg *Aggregator) ClearTasksFromMaps() {
421440
defer func() {
422441
err := recover() //stops panics
423442
if err != nil {
424-
agg.logger.Error("Recovered from panic", "err", err)
443+
agg.logger.Error("ClearTasksFromMaps Recovered from panic", "err", err)
425444
}
426445
}()
427446

@@ -441,6 +460,8 @@ func (agg *Aggregator) ClearTasksFromMaps() {
441460
agg.logger.Warn("No old tasks found")
442461
continue // Retry in the next iteration
443462
}
463+
agg.taskMutex.Lock()
464+
agg.AggregatorConfig.BaseConfig.Logger.Info("- Locked Resources: Cleaning finalized tasks")
444465

445466
taskIdxToDelete := agg.batchesIdxByIdentifierHash[*oldTaskIdHash]
446467
agg.logger.Info("Old task found", "taskIndex", taskIdxToDelete)
@@ -458,6 +479,8 @@ func (agg *Aggregator) ClearTasksFromMaps() {
458479
}
459480
}
460481
lastIdxDeleted = taskIdxToDelete
482+
agg.taskMutex.Unlock()
483+
agg.AggregatorConfig.BaseConfig.Logger.Info("- Unlocked Resources: Cleaning finalized tasks")
461484
agg.AggregatorConfig.BaseConfig.Logger.Info("Done cleaning finalized tasks from maps")
462485
}
463486
}

0 commit comments

Comments
 (0)