Skip to content

Commit 261e785

Browse files
craig[bot]arulajmanirickystewartyuzefovichsumeerbhola
committed
105474: concurrency: hoist TxnMeta from {,un}replicatedLockInfo into the holder r=nvanbenschoten a=arulajmani Previously, we were storing the TxnMeta separately for both {,un}replicatedLockInfo. The lock table is quite dumb when it comes to replicated locks -- for good reason. As a result, tracking TxnMeta's for replicated locks isn't of much use, as we don't make use of it, and this patch does exactly that. This also allows us to hoist the TxnMeta object one level higher, into the holder struct. Epic: none Release note: None 107211: ci: add some retries for `git fetch`es r=rail a=rickystewart Rarely these can fail. Add retries. Closes cockroachdb#107087 Epic: none Releae note: None 107231: server: fix recently introduced bug r=yuzefovich a=yuzefovich Over in 609230c on `TestTenant.TracerI` we returned the function rather than the underlying tracer. Epic: None Release note: None 107249: storage: fix PebbleFileRegistry bug that drops entry on rollover r=jbowens a=sumeerbhola The writeToRegistryFile method first writes the new batch, containing file mappings, to the registry file, and then if the registry file is too big, creates a new registry file. The new registry file is populated with the contents of the map, which doesn't yet contain the edits in the batch, resulting in a loss of these edits when the file registry is reopened. This PR changes the logic to first rollover if the registry file is too big, and then writes the batch to the new file. This bug has existed since the record writer based registry was implemented cockroachdb@239377a. When it leads to a loss of a file mapping in the registry, it will be noticed by Pebble as a corruption (so not a silent failure) since the file corresponding to the mapping will be assumed to be unencrypted, but can't be successfully read as an unencrypted file. Since we have not seen this occur in production settings, we suspect that an observable mapping loss is rare because compactions typically rewrite the files in those lost mappings before the file registry is reopened. Epic: none Fixes: cockroachdb#106617 Release note: None 107259: changefeedccl: Treat drop descriptors as terminal r=miretskiy a=miretskiy Prior to this change, changefeed would sometimes treat droped descriptors (ErrDroppedDescriptor) as a terminal error, and sometimes it would be treated as a retryable error (though, upon retry, the error would be upgraded to terminal). This PR cleans up this logic and ensures that any "dropped descriptor" error is treated as terminal. Informs https://github.com/cockroachlabs/support/issues/2408 Release note: None Co-authored-by: Arul Ajmani <[email protected]> Co-authored-by: Ricky Stewart <[email protected]> Co-authored-by: Yahor Yuzefovich <[email protected]> Co-authored-by: sumeerbhola <[email protected]> Co-authored-by: Yevgeniy Miretskiy <[email protected]>
6 parents 5acb999 + 2e8016d + c096bd7 + ab7aa17 + 0e5d38a + 18ac60a commit 261e785

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1183
-1007
lines changed

build/teamcity-support.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,15 @@ get_upstream_branch() {
228228
}
229229

230230
changed_go_pkgs() {
231-
git fetch --quiet origin
231+
n=0
232+
until git fetch --quiet origin; do
233+
n=$((n+1))
234+
if [ "$n" -ge 3 ]; then
235+
echo "Could not fetch from GitHub"
236+
exit 1
237+
fi
238+
sleep 5
239+
done
232240
upstream_branch=$(get_upstream_branch)
233241
# Find changed packages, minus those that have been removed entirely. Note
234242
# that the three-dot notation means we are diffing against the merge-base of

build/teamcity/cockroach/ci/tests/maybe_stress.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@ source "$dir/teamcity-support.sh" # For $root, would_stress
88
source "$dir/teamcity-bazel-support.sh" # For run_bazel
99

1010
if would_stress; then
11-
git fetch origin master
11+
n=0
12+
until git fetch origin master; do
13+
n=$((n+1))
14+
if [ "$n" -ge 3 ]; then
15+
echo "Could not fetch from GitHub"
16+
exit 1
17+
fi
18+
sleep 5
19+
done
1220
tc_start_block "Run stress tests"
1321
run_bazel env BUILD_VCS_NUMBER="$BUILD_VCS_NUMBER" build/teamcity/cockroach/ci/tests/maybe_stress_impl.sh stress
1422
tc_end_block "Run stress tests"

build/teamcity/cockroach/ci/tests/maybe_stressrace.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@ source "$dir/teamcity-support.sh" # For $root, would_stress
88
source "$dir/teamcity-bazel-support.sh" # For run_bazel
99

1010
if would_stress; then
11-
git fetch origin master
11+
n=0
12+
until git fetch origin master; do
13+
n=$((n+1))
14+
if [ "$n" -ge 3 ]; then
15+
echo "Could not fetch from GitHub"
16+
exit 1
17+
fi
18+
sleep 5
19+
done
1220
tc_start_block "Run stress tests"
1321
run_bazel env BUILD_VCS_NUMBER="$BUILD_VCS_NUMBER" build/teamcity/cockroach/ci/tests/maybe_stress_impl.sh stressrace
1422
tc_end_block "Run stress tests"

pkg/ccl/changefeedccl/cdcevent/rowfetcher_cache.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ func refreshUDT(
109109
tableDesc, err = collection.ByIDWithLeased(txn).WithoutNonPublic().Get().Table(ctx, tableID)
110110
return err
111111
}); err != nil {
112+
if errors.Is(err, catalog.ErrDescriptorDropped) {
113+
// Dropped descriptors are a bad news.
114+
return nil, changefeedbase.WithTerminalError(err)
115+
}
116+
112117
// Manager can return all kinds of errors during chaos, but based on
113118
// its usage, none of them should ever be terminal.
114119
return nil, changefeedbase.MarkRetryableError(err)

pkg/ccl/changefeedccl/changefeed_dist.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ func fetchTableDescriptors(
154154
})
155155
}
156156
if err := sql.DescsTxn(ctx, execCfg, fetchSpans); err != nil {
157+
if errors.Is(err, catalog.ErrDescriptorDropped) {
158+
return nil, changefeedbase.WithTerminalError(err)
159+
}
157160
return nil, err
158161
}
159162
return targetDescs, nil

pkg/ccl/changefeedccl/schemafeed/schema_feed.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,14 @@ func (tf *schemaFeed) validateDescriptor(
658658
}
659659
// If a interesting type changed, then we just want to force the lease
660660
// manager to acquire the freshest version of the type.
661-
return tf.leaseMgr.AcquireFreshestFromStore(ctx, desc.GetID())
661+
if err := tf.leaseMgr.AcquireFreshestFromStore(ctx, desc.GetID()); err != nil {
662+
err = errors.Wrapf(err, "could not acquire type descriptor %d lease", desc.GetID())
663+
if errors.Is(err, catalog.ErrDescriptorDropped) { // That's pretty fatal.
664+
err = changefeedbase.WithTerminalError(err)
665+
}
666+
return err
667+
}
668+
return nil
662669
case catalog.TableDescriptor:
663670
if err := changefeedvalidators.ValidateTable(tf.targets, desc, tf.tolerances); err != nil {
664671
return err

0 commit comments

Comments
 (0)