Skip to content

Commit 2794f07

Browse files
committed
roachtest/vecindex: Always download datasets when testing
Previously, the vecindex roachtest would use the vecann default of attempting to reuse cached dataset files. This led to a situation where a test runner apparently cached a truncated file, causing the test to fail repeatedly when run from that runner. This patch changes the behavior to always download the dataset, so the test doesn't repeatedly flake. Fixes: #157119 Release note: None
1 parent 9510b99 commit 2794f07

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

pkg/cmd/roachtest/tests/vecindex.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ func runVectorIndex(ctx context.Context, t test.Test, c cluster.Cluster, opts ve
253253
t.L().Printf("Loading dataset %s", opts.dataset)
254254
loader := vecann.DatasetLoader{
255255
DatasetName: opts.dataset,
256+
ResetCache: true,
256257
OnProgress: func(ctx context.Context, format string, args ...any) {
257258
t.L().Printf(format, args...)
258259
},

pkg/workload/vecann/datasets.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ type DatasetLoader struct {
9090
// CacheFolder is the path to the temporary folder where datasets will be
9191
// cached. It defaults to ~/.cache/workload-datasets.
9292
CacheFolder string
93+
// ResetCache indicates that the cache should be re-populated.
94+
ResetCache bool
9395

9496
// OnProgress logs the progress of the loading process.
9597
OnProgress func(ctx context.Context, format string, args ...any)
@@ -128,12 +130,12 @@ func (dl *DatasetLoader) loadFiles(ctx context.Context) error {
128130
neighbors := fmt.Sprintf("%s/%s-neighbors-%s.ibin", baseDir, baseName, metric)
129131

130132
// Download test and neighbors files if missing.
131-
if !fileExists(test) {
133+
if dl.ResetCache || !fileExists(test) {
132134
if err := dl.downloadAndUnzip(ctx, baseName, baseName+"-test.fbin.zip", test); err != nil {
133135
return err
134136
}
135137
}
136-
if !fileExists(neighbors) {
138+
if dl.ResetCache || !fileExists(neighbors) {
137139
fileName := baseName + "-neighbors-" + metric + ".ibin.zip"
138140
if err := dl.downloadAndUnzip(ctx, baseName, fileName, neighbors); err != nil {
139141
return err
@@ -179,7 +181,7 @@ func (dl *DatasetLoader) downloadTrainFiles(
179181
// First, check for files in the cache.
180182
onlyFileName := fmt.Sprintf("%s/%s.fbin", baseDir, baseName)
181183
firstPartName := fmt.Sprintf("%s/%s-1.fbin", baseDir, baseName)
182-
if !fileExists(onlyFileName) && !fileExists(firstPartName) {
184+
if dl.ResetCache || (!fileExists(onlyFileName) && !fileExists(firstPartName)) {
183185
// No files in cache, download them.
184186
partNum := 0
185187
for {

0 commit comments

Comments
 (0)