@@ -8,10 +8,7 @@ package ycsb
8
8
9
9
import (
10
10
"context"
11
- "encoding/binary"
12
11
"fmt"
13
- "hash"
14
- "hash/fnv"
15
12
"math"
16
13
"math/rand/v2"
17
14
"strings"
@@ -26,6 +23,7 @@ import (
26
23
"github.com/cockroachdb/cockroach/pkg/workload"
27
24
"github.com/cockroachdb/cockroach/pkg/workload/histogram"
28
25
"github.com/cockroachdb/cockroach/pkg/workload/workloadimpl"
26
+ "github.com/cockroachdb/crlib/crrand"
29
27
"github.com/cockroachdb/errors"
30
28
"github.com/jackc/pgx/v5"
31
29
"github.com/jackc/pgx/v5/pgconn"
@@ -89,17 +87,17 @@ type ycsb struct {
89
87
flags workload.Flags
90
88
connFlags * workload.ConnFlags
91
89
92
- timeString bool
93
- insertHash bool
94
- zeroPadding int
95
- insertStart int
96
- insertCount int
97
- recordCount int
98
- json bool
99
- families bool
100
- rmwInTxn bool
101
- sfu bool
102
- splits int
90
+ timeString bool
91
+ insertRandom bool
92
+ zeroPadding int
93
+ insertStart int
94
+ insertCount int
95
+ recordCount int
96
+ json bool
97
+ families bool
98
+ rmwInTxn bool
99
+ sfu bool
100
+ splits int
103
101
104
102
workload string
105
103
requestDistribution string
@@ -131,9 +129,15 @@ var ycsbMeta = workload.Meta{
131
129
`scan-freq` : {RuntimeOnly : true },
132
130
`read-modify-write-freq` : {RuntimeOnly : true },
133
131
}
132
+ g .flags .SetNormalizeFunc (func (flags * pflag.FlagSet , name string ) pflag.NormalizedName {
133
+ if name == `insert-hash` {
134
+ name = `insert-random`
135
+ }
136
+ return pflag .NormalizedName (name )
137
+ })
134
138
g .flags .BoolVar (& g .timeString , `time-string` , false , `Prepend field[0-9] data with current time in microsecond precision.` )
135
- g .flags .BoolVar (& g .insertHash , `insert-hash ` , true , `Key to be hashed or ordered.` )
136
- g .flags .IntVar (& g .zeroPadding , `zero-padding` , 1 , `Key using "insert-hash=false" has zeros padded to left to make this length of digits.` )
139
+ g .flags .BoolVar (& g .insertRandom , `insert-random ` , true , `Key to be pseduorandom or ordered.` )
140
+ g .flags .IntVar (& g .zeroPadding , `zero-padding` , 1 , `Key has zeros padded to left to make this length of digits.` )
137
141
g .flags .IntVar (& g .insertStart , `insert-start` , 0 , `Key to start initial sequential insertions from. (default 0)` )
138
142
g .flags .IntVar (& g .insertCount , `insert-count` , 10000 , `Number of rows to sequentially insert before beginning workload.` )
139
143
g .flags .IntVar (& g .recordCount , `record-count` , 0 , `Key to start workload insertions from. Must be >= insert-start + insert-count. (Default: insert-start + insert-count)` )
@@ -328,9 +332,13 @@ func (g *ycsb) Tables() []workload.Table {
328
332
Splits : workload .Tuples (
329
333
g .splits ,
330
334
func (splitIdx int ) []interface {} {
335
+ w := ycsbWorker {
336
+ config : g ,
337
+ prngPerm : crrand .MakePerm64 (RandomSeed .Seed ()),
338
+ }
331
339
step := math .MaxUint64 / uint64 (g .splits + 1 )
332
340
return []interface {}{
333
- keyNameFromHash (step * uint64 (splitIdx + 1 )),
341
+ w . buildKeyName (step * uint64 (splitIdx + 1 )),
334
342
}
335
343
},
336
344
),
@@ -342,7 +350,7 @@ func (g *ycsb) Tables() []workload.Table {
342
350
func (rowIdx int ) []interface {} {
343
351
w := ycsbWorker {
344
352
config : g ,
345
- hashFunc : fnv . New64 ( ),
353
+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
346
354
}
347
355
key := w .buildKeyName (uint64 (g .insertStart + rowIdx ))
348
356
// TODO(peter): Need to fill in FIELD here, rather than an empty JSONB
@@ -359,12 +367,6 @@ func (g *ycsb) Tables() []workload.Table {
359
367
const batchSize = 1000
360
368
usertable .InitialRows = workload.BatchedTuples {
361
369
NumBatches : (g .insertCount + batchSize - 1 ) / batchSize ,
362
- // If the key sequence is hashed, duplicates are possible. Hash
363
- // collisions are inevitable at large insert counts (they're at
364
- // least inevitable at ~1b rows). Marking that the keys may contain
365
- // duplicates will cause the data loader to use INSERT ... ON
366
- // CONFLICT DO NOTHING statements.
367
- MayContainDuplicates : ! g .insertHash ,
368
370
FillBatch : func (batchIdx int , cb coldata.Batch , _ * bufalloc.ByteAllocator ) {
369
371
rowBegin , rowEnd := batchIdx * batchSize , (batchIdx + 1 )* batchSize
370
372
if rowEnd > g .insertCount {
@@ -385,7 +387,7 @@ func (g *ycsb) Tables() []workload.Table {
385
387
386
388
w := ycsbWorker {
387
389
config : g ,
388
- hashFunc : fnv . New64 ( ),
390
+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
389
391
}
390
392
rng := rand .NewPCG (RandomSeed .Seed (), uint64 (batchIdx ))
391
393
@@ -577,7 +579,7 @@ func (g *ycsb) Ops(
577
579
requestGen : requestGen ,
578
580
scanLengthGen : scanLengthGen ,
579
581
rng : rng ,
580
- hashFunc : fnv . New64 ( ),
582
+ prngPerm : crrand . MakePerm64 ( RandomSeed . Seed () ),
581
583
}
582
584
ql .WorkerFns = append (ql .WorkerFns , w .run )
583
585
}
@@ -622,8 +624,7 @@ type ycsbWorker struct {
622
624
requestGen randGenerator // used to generate random keys for requests
623
625
scanLengthGen randGenerator // used to generate length of scan operations
624
626
rng * rand.Rand // used to generate random strings for the values
625
- hashFunc hash.Hash64
626
- hashBuf [binary .MaxVarintLen64 ]byte
627
+ prngPerm crrand.Perm64 // used to map the key index to a pseudorandom key
627
628
}
628
629
629
630
func (yw * ycsbWorker ) run (ctx context.Context ) error {
@@ -691,29 +692,12 @@ const (
691
692
readModifyWriteOp operation = `readModifyWrite`
692
693
)
693
694
694
- func (yw * ycsbWorker ) hashKey (key uint64 ) uint64 {
695
- yw .hashBuf = [binary .MaxVarintLen64 ]byte {} // clear hashBuf
696
- binary .PutUvarint (yw .hashBuf [:], key )
697
- yw .hashFunc .Reset ()
698
- if _ , err := yw .hashFunc .Write (yw .hashBuf [:]); err != nil {
699
- panic (err )
700
- }
701
- return yw .hashFunc .Sum64 ()
702
- }
703
-
704
695
func (yw * ycsbWorker ) buildKeyName (keynum uint64 ) string {
705
- if yw .config .insertHash {
706
- return keyNameFromHash (yw .hashKey (keynum ))
696
+ if yw .config .insertRandom {
697
+ // Use prngPerm to map the key index to a pseudorandom key.
698
+ keynum = yw .prngPerm .At (keynum )
707
699
}
708
- return keyNameFromOrder (keynum , yw .config .zeroPadding )
709
- }
710
-
711
- func keyNameFromHash (hashedKey uint64 ) string {
712
- return fmt .Sprintf ("user%d" , hashedKey )
713
- }
714
-
715
- func keyNameFromOrder (keynum uint64 , zeroPadding int ) string {
716
- return fmt .Sprintf ("user%0*d" , zeroPadding , keynum )
700
+ return fmt .Sprintf ("user%0*d" , yw .config .zeroPadding , keynum )
717
701
}
718
702
719
703
// Keys are chosen by first drawing from a Zipf distribution, hashing the drawn
0 commit comments