Skip to content

Commit b010ba8

Browse files
craig[bot]yuzefovichDarrylWongjbowens
committed
157804: backfill: skip external tenant under deadlock in vector backfill test r=yuzefovich a=yuzefovich `TestVectorIndexMergingDuringBackfillWithPrefix` is rather intensive, and we've seen a couple of failures due to general overload in the external-process mode under deadlock, so we'll just disable that mode under deadlock. Alternative idea I had was to exempt the tenant from rate limiting, but I wasn't able to reproduce the failure on gceworker, so I couldn't verify that it worked and decided to not go with it. Fixes: #156995. Release note: None 157919: roachprod: fix service registry unit test name collision r=golgeek a=DarrylWong We were previously generating cluster names with Uint32, which is not sufficient enough to prevent naming collisions after 1000 iterations. Instead, use a RandString of length 10. Fixes: #157884 Release note: none 157920: storage,server: add debug separated value retrieval profile endpoint r=RaduBerinde a=jbowens Add a new debug endpoint that returns a text-formatted representation of all the stack traces that performed a retrieval of a separated value while the profile was being collected. This provides better observability into what code paths are suffering more expensive value retrievals. Epic: none Informs: #146575 Release note: none Co-authored-by: Yahor Yuzefovich <[email protected]> Co-authored-by: DarrylWong <[email protected]> Co-authored-by: Jackson Owens <[email protected]>
4 parents 0b875de + de6fa3a + 68a2388 + 0ede4a1 commit b010ba8

File tree

5 files changed

+51
-1
lines changed

5 files changed

+51
-1
lines changed

pkg/roachprod/install/services_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func (t *serviceRegistryTest) makeVM(clusterName string, i int) vm.VM {
138138
}
139139

140140
func (t *serviceRegistryTest) newCluster(nodeCount int) *SyncedCluster {
141-
clusterName := fmt.Sprintf("cluster-%d", t.rng.Uint32())
141+
clusterName := fmt.Sprintf("cluster-%s", randutil.RandString(t.rng, 10, randutil.PrintableKeyAlphabet))
142142
c := &SyncedCluster{
143143
Cluster: cloud.Cluster{
144144
Name: clusterName,

pkg/server/debug/server.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
package debug
77

88
import (
9+
"context"
910
"expvar"
1011
"fmt"
1112
"io"
1213
"net/http"
1314
"net/http/pprof"
1415
"strconv"
1516
"strings"
17+
"time"
1618

1719
"github.com/cockroachdb/cockroach/pkg/base/serverident"
1820
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
@@ -277,6 +279,29 @@ func (ds *Server) RegisterEngines(engines []storage.Engine) error {
277279
fmt.Fprintf(w, "error analyzing LSM at %s: %v", dir, err)
278280
}
279281
})
282+
ds.mux.HandleFunc(fmt.Sprintf("/debug/storage/%d/profiles/separated-value-retrievals", storeID),
283+
func(w http.ResponseWriter, req *http.Request) {
284+
dur := 30 * time.Second
285+
if secsStr := req.Header.Get("seconds"); secsStr != "" {
286+
secs, err := strconv.ParseInt(secsStr, 10, 64)
287+
if err != nil {
288+
http.Error(w, "error parsing seconds", http.StatusBadRequest)
289+
return
290+
}
291+
dur = time.Duration(secs) * time.Second
292+
}
293+
ctx := req.Context()
294+
ctx, cancel := context.WithTimeout(ctx, dur)
295+
defer cancel()
296+
profile, err := eng.ProfileSeparatedValueRetrievals(ctx)
297+
if err != nil {
298+
http.Error(w, "error profiling separated value retrievals", http.StatusInternalServerError)
299+
return
300+
}
301+
w.Header().Set("Content-Type", "text/plain")
302+
w.WriteHeader(http.StatusOK)
303+
fmt.Fprint(w, profile.String())
304+
})
280305
}
281306
return nil
282307
}

pkg/sql/backfill/backfill_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,13 @@ func TestVectorIndexMergingDuringBackfillWithPrefix(t *testing.T) {
346346
blockBackfill := make(chan struct{})
347347
backfillBlocked := make(chan struct{})
348348

349+
var testTenant base.DefaultTestTenantOptions
350+
if syncutil.DeadlockEnabled {
351+
// The cluster can get overloaded under deadlock with external process
352+
// mode.
353+
testTenant = base.TestSkipForExternalProcessMode()
354+
}
355+
349356
ctx := context.Background()
350357
srv, db, _ := serverutils.StartServer(t, base.TestServerArgs{
351358
Knobs: base.TestingKnobs{
@@ -357,6 +364,7 @@ func TestVectorIndexMergingDuringBackfillWithPrefix(t *testing.T) {
357364
},
358365
},
359366
},
367+
DefaultTestTenant: testTenant,
360368
})
361369
defer srv.Stopper().Stop(ctx)
362370
sqlDB := sqlutils.MakeSQLRunner(db)

pkg/storage/engine.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/cockroachdb/cockroach/pkg/util/uuid"
2727
"github.com/cockroachdb/errors"
2828
"github.com/cockroachdb/pebble"
29+
"github.com/cockroachdb/pebble/metrics"
2930
"github.com/cockroachdb/pebble/rangekey"
3031
"github.com/cockroachdb/pebble/vfs"
3132
"github.com/cockroachdb/redact"
@@ -926,6 +927,9 @@ type Engine interface {
926927
Capacity() (roachpb.StoreCapacity, error)
927928
// Properties returns the low-level properties for the engine's underlying storage.
928929
Properties() roachpb.StoreProperties
930+
// ProfileSeparatedValueRetrievals collects a profile of the engine's
931+
// separated value retrievals. It stops when the context is done.
932+
ProfileSeparatedValueRetrievals(ctx context.Context) (*metrics.ValueRetrievalProfile, error)
929933
// Compact forces compaction over the entire database.
930934
Compact(ctx context.Context) error
931935
// Env returns the filesystem environment used by the Engine.

pkg/storage/pebble.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2136,6 +2136,19 @@ func (p *Pebble) GetEnvStats() (*fs.EnvStats, error) {
21362136
return stats, nil
21372137
}
21382138

2139+
// ProfileSeparatedValueRetrievals collects a profile of the engine's
2140+
// separated value retrievals. It stops when the context is done.
2141+
func (p *Pebble) ProfileSeparatedValueRetrievals(
2142+
ctx context.Context,
2143+
) (*metrics.ValueRetrievalProfile, error) {
2144+
stop, err := p.db.RecordSeparatedValueRetrievals()
2145+
if err != nil {
2146+
return nil, err
2147+
}
2148+
<-ctx.Done()
2149+
return stop(), nil
2150+
}
2151+
21392152
// GetAuxiliaryDir implements the Engine interface.
21402153
func (p *Pebble) GetAuxiliaryDir() string {
21412154
return p.auxDir

0 commit comments

Comments
 (0)