From c9f62dfb0f06b03748a0999595d1551d06d3d996 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 07:48:47 +0000 Subject: [PATCH 1/5] Initial plan From 1e43d1d8b211f6eb94cb28bb3cc83b64480f758d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 08:02:59 +0000 Subject: [PATCH 2/5] fix: add retry logic in validatePrevote and validatePrecommit to handle race condition Co-authored-by: tzdybal <1138893+tzdybal@users.noreply.github.com> --- consensus/common_test.go | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index 4cf54b0b07..3a47d6be33 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -274,14 +274,28 @@ func signAddVotes( } func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStub, blockHash []byte) { - cs.rsMtx.RLock() - prevotes := cs.rs.Votes.Prevotes(round) - cs.rsMtx.RUnlock() pubKey, err := privVal.GetPubKey() require.NoError(t, err) address := pubKey.Address() + + // Retry logic to handle race condition where vote event is published before vote is added to state var vote *types.Vote - if vote = prevotes.GetByAddress(address); vote == nil { + for i := 0; i < 10; i++ { + cs.rsMtx.RLock() + prevotes := cs.rs.Votes.Prevotes(round) + vote = prevotes.GetByAddress(address) + cs.rsMtx.RUnlock() + + if vote != nil { + break + } + + if i < 9 { // Don't sleep on the last iteration + time.Sleep(1 * time.Millisecond) + } + } + + if vote == nil { panic("Failed to find prevote from validator") } if blockHash == nil { @@ -320,12 +334,26 @@ func validatePrecommit( votedBlockHash, lockedBlockHash []byte, ) { - precommits := cs.rs.Votes.Precommits(thisRound) pv, err := privVal.GetPubKey() require.NoError(t, err) address := pv.Address() + + // Retry logic to handle race condition where vote event is published before vote is added to state var vote *types.Vote - if vote = precommits.GetByAddress(address); vote == nil { + for i := 0; i < 10; i++ { + precommits := cs.rs.Votes.Precommits(thisRound) + vote = precommits.GetByAddress(address) + + if vote != nil { + break + } + + if i < 9 { // Don't sleep on the last iteration + time.Sleep(1 * time.Millisecond) + } + } + + if vote == nil { panic("Failed to find precommit from validator") } From f610e0681599e57bf6c88252d6ea752de6fe94c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:38:52 +0000 Subject: [PATCH 3/5] fix: improve race condition handling in validatePrevote and validatePrecommit with retry logic Co-authored-by: tzdybal <1138893+tzdybal@users.noreply.github.com> --- consensus/common_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index 3a47d6be33..d512c8f8b3 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -278,9 +278,9 @@ func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStu require.NoError(t, err) address := pubKey.Address() - // Retry logic to handle race condition where vote event is published before vote is added to state + // Simple retry logic to handle race condition var vote *types.Vote - for i := 0; i < 10; i++ { + for i := 0; i < 20; i++ { cs.rsMtx.RLock() prevotes := cs.rs.Votes.Prevotes(round) vote = prevotes.GetByAddress(address) @@ -290,9 +290,8 @@ func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStu break } - if i < 9 { // Don't sleep on the last iteration - time.Sleep(1 * time.Millisecond) - } + // Brief delay between retries + time.Sleep(1 * time.Millisecond) } if vote == nil { @@ -338,19 +337,20 @@ func validatePrecommit( require.NoError(t, err) address := pv.Address() - // Retry logic to handle race condition where vote event is published before vote is added to state + // Simple retry logic to handle race condition var vote *types.Vote - for i := 0; i < 10; i++ { + for i := 0; i < 20; i++ { + cs.rsMtx.RLock() precommits := cs.rs.Votes.Precommits(thisRound) vote = precommits.GetByAddress(address) + cs.rsMtx.RUnlock() if vote != nil { break } - if i < 9 { // Don't sleep on the last iteration - time.Sleep(1 * time.Millisecond) - } + // Brief delay between retries + time.Sleep(1 * time.Millisecond) } if vote == nil { From 649ddccdc3e6b6822ad5ebe65a6094372faba2b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:52:28 +0000 Subject: [PATCH 4/5] fix: use GetRoundState method instead of direct mutex access in validation functions Co-authored-by: tzdybal <1138893+tzdybal@users.noreply.github.com> --- consensus/common_test.go | 49 +++++++++++++--------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index d512c8f8b3..09a6423478 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -274,25 +274,16 @@ func signAddVotes( } func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStub, blockHash []byte) { + // Wait for vote to be fully processed into consensus state + time.Sleep(10 * time.Millisecond) + + // Use GetRoundState() which may be safer than direct mutex access + rs := cs.GetRoundState() + prevotes := rs.Votes.Prevotes(round) pubKey, err := privVal.GetPubKey() require.NoError(t, err) address := pubKey.Address() - - // Simple retry logic to handle race condition - var vote *types.Vote - for i := 0; i < 20; i++ { - cs.rsMtx.RLock() - prevotes := cs.rs.Votes.Prevotes(round) - vote = prevotes.GetByAddress(address) - cs.rsMtx.RUnlock() - - if vote != nil { - break - } - - // Brief delay between retries - time.Sleep(1 * time.Millisecond) - } + vote := prevotes.GetByAddress(address) if vote == nil { panic("Failed to find prevote from validator") @@ -308,6 +299,8 @@ func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStu } } + + func validateLastPrecommit(t *testing.T, cs *State, privVal *validatorStub, blockHash []byte) { cs.rsMtx.RLock() votes := cs.rs.LastCommit @@ -333,25 +326,16 @@ func validatePrecommit( votedBlockHash, lockedBlockHash []byte, ) { + // Wait for vote to be fully processed into consensus state + time.Sleep(10 * time.Millisecond) + + // Use GetRoundState() which may be safer than direct mutex access + rs := cs.GetRoundState() + precommits := rs.Votes.Precommits(thisRound) pv, err := privVal.GetPubKey() require.NoError(t, err) address := pv.Address() - - // Simple retry logic to handle race condition - var vote *types.Vote - for i := 0; i < 20; i++ { - cs.rsMtx.RLock() - precommits := cs.rs.Votes.Precommits(thisRound) - vote = precommits.GetByAddress(address) - cs.rsMtx.RUnlock() - - if vote != nil { - break - } - - // Brief delay between retries - time.Sleep(1 * time.Millisecond) - } + vote := precommits.GetByAddress(address) if vote == nil { panic("Failed to find precommit from validator") @@ -367,7 +351,6 @@ func validatePrecommit( } } - rs := cs.GetRoundState() if lockedBlockHash == nil { if rs.LockedRound != lockRound || rs.LockedBlock != nil { panic(fmt.Sprintf( From 5baa7b519cb1780041518be05a7e7f09da5b66c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Sep 2025 11:15:50 +0000 Subject: [PATCH 5/5] fix: implement retry logic using GetRoundState() to resolve TestStateFullRound1 race condition Co-authored-by: tzdybal <1138893+tzdybal@users.noreply.github.com> --- consensus/common_test.go | 47 ++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/consensus/common_test.go b/consensus/common_test.go index 09a6423478..5a8dfe134a 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -274,16 +274,25 @@ func signAddVotes( } func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStub, blockHash []byte) { - // Wait for vote to be fully processed into consensus state - time.Sleep(10 * time.Millisecond) - - // Use GetRoundState() which may be safer than direct mutex access - rs := cs.GetRoundState() - prevotes := rs.Votes.Prevotes(round) pubKey, err := privVal.GetPubKey() require.NoError(t, err) address := pubKey.Address() - vote := prevotes.GetByAddress(address) + + // Simple retry logic to handle race condition + var vote *types.Vote + for i := 0; i < 30; i++ { + rs := cs.GetRoundState() + prevotes := rs.Votes.Prevotes(round) + vote = prevotes.GetByAddress(address) + + if vote != nil { + break + } + + if i < 29 { + time.Sleep(2 * time.Millisecond) + } + } if vote == nil { panic("Failed to find prevote from validator") @@ -326,16 +335,26 @@ func validatePrecommit( votedBlockHash, lockedBlockHash []byte, ) { - // Wait for vote to be fully processed into consensus state - time.Sleep(10 * time.Millisecond) - - // Use GetRoundState() which may be safer than direct mutex access - rs := cs.GetRoundState() - precommits := rs.Votes.Precommits(thisRound) pv, err := privVal.GetPubKey() require.NoError(t, err) address := pv.Address() - vote := precommits.GetByAddress(address) + + // Simple retry logic to handle race condition + var vote *types.Vote + var rs *cstypes.RoundState + for i := 0; i < 30; i++ { + rs = cs.GetRoundState() + precommits := rs.Votes.Precommits(thisRound) + vote = precommits.GetByAddress(address) + + if vote != nil { + break + } + + if i < 29 { + time.Sleep(2 * time.Millisecond) + } + } if vote == nil { panic("Failed to find precommit from validator")