Skip to content

Commit 088b66e

Browse files
craig[bot]jeffswenson
andcommitted
Merge #144818
144818: tpcc: use explicit decimal when initializing items r=jeffswenson a=jeffswenson Previously, TPCC relied on a float -> decimal -> decimal(precision, scale) cast to import data. In rare cases this could lead to incorrectly normalized values like 1E2 instead of 100. Now, the TPCC workload generator creates decimals with the correct scale. Informs: #144474 Informs: #143870 Fixes: #143913 Fixes: #144289 Release note: none Co-authored-by: Jeff Swenson <[email protected]>
2 parents 027bdb4 + 326b1e7 commit 088b66e

File tree

10 files changed

+89
-102
lines changed

10 files changed

+89
-102
lines changed

pkg/ccl/workloadccl/allccl/all_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ func hashTableInitialData(
247247
binary.LittleEndian.PutUint64(scratch[:8], uint64(colTime[i].UnixNano()))
248248
_, _ = h.Write(scratch[:8])
249249
}
250+
case types.DecimalFamily:
251+
colDecimal := col.Decimal()
252+
for i := 0; i < b.Length(); i++ {
253+
_, _ = h.Write([]byte(colDecimal[i].String()))
254+
}
250255
default:
251256
return errors.Errorf(`unhandled type %s`, col.Type())
252257
}
@@ -282,7 +287,7 @@ func TestDeterministicInitialData(t *testing.T) {
282287
`roachmart`: 0xda5e73423dbdb2d9,
283288
`sqlsmith`: 0xcbf29ce484222325,
284289
`startrek`: 0xa0249fbdf612734c,
285-
`tpcc`: 0xcccced25deea244e,
290+
`tpcc`: 0xccfecd06eed59975,
286291
`tpch`: 0xcd2abbd021ed895d,
287292
`ycsb`: 0x0e6012ee6491a0fb,
288293
}

pkg/cmd/roachtest/tests/mixed_version_import.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ import (
2020

2121
func registerImportMixedVersions(r registry.Registry) {
2222
r.Add(registry.TestSpec{
23+
// TODO(jeffswenson): re-enable mixed version import once #144818 is
24+
// backported. This test is fragile because it expects the special
25+
// 'workload://' fixtures to be deterministic across versions. A better
26+
// version of this test would use actual CSV fixtures.
27+
Skip: "Issue #143870",
2328
Name: "import/mixed-versions",
2429
Owner: registry.OwnerSQLQueries,
2530
Cluster: r.MakeClusterSpec(4),

pkg/sql/importer/read_import_workload.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ func makeDatumFromColOffset(
132132
// MakeDTimestamp here and just directly construct it.
133133
return alloc.NewDTimestampTZ(tree.DTimestampTZ{Time: col.Timestamp()[rowIdx]}), nil
134134
}
135+
case types.DecimalFamily:
136+
return alloc.NewDDecimal(tree.DDecimal{Decimal: col.Decimal()[rowIdx]}), nil
135137
}
136138
return nil, errors.Errorf(
137139
`don't know how to interpret %s column as %s`, col.Type(), hint)

pkg/testutils/lint/lint_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,8 +2090,6 @@ func TestLint(t *testing.T) {
20902090
stream.GrepNot(`pkg/cmd/mirror/go/mirror.go`),
20912091
// As above, the bazel build tag has an impact here.
20922092
stream.GrepNot(`pkg/testutils/docker/single_node_docker_test.go`),
2093-
// TODO(#143870): remove uses of this package.
2094-
stream.GrepNot(`"golang.org/x/exp/rand" is deprecated`),
20952093
}
20962094
for analyzerName, config := range nogoConfig {
20972095
if !staticcheckCheckNameRe.MatchString(analyzerName) {

pkg/workload/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ go_library(
2828
"//pkg/util/syncutil",
2929
"//pkg/util/timeutil",
3030
"//pkg/workload/histogram",
31+
"@com_github_cockroachdb_apd_v3//:apd",
3132
"@com_github_cockroachdb_errors//:errors",
3233
"@com_github_datadog_datadog_api_client_go_v2//api/datadog",
3334
"@com_github_datadog_datadog_api_client_go_v2//api/datadogV1",

pkg/workload/tpcc/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ go_library(
4040
"//pkg/workload/histogram",
4141
"//pkg/workload/histogram/exporter",
4242
"//pkg/workload/workloadimpl",
43+
"@com_github_cockroachdb_apd_v3//:apd",
4344
"@com_github_cockroachdb_cockroach_go_v2//crdb/crdbpgxv5",
4445
"@com_github_cockroachdb_errors//:errors",
4546
"@com_github_codahale_hdrhistogram//:hdrhistogram",
@@ -50,7 +51,6 @@ go_library(
5051
"@com_github_prometheus_client_golang//prometheus",
5152
"@com_github_prometheus_client_golang//prometheus/promauto",
5253
"@com_github_spf13_pflag//:pflag",
53-
"@org_golang_x_exp//rand",
5454
"@org_golang_x_sync//errgroup",
5555
],
5656
)

pkg/workload/tpcc/generate.go

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ import (
99
"math/rand/v2"
1010
"strconv"
1111

12+
"github.com/cockroachdb/apd/v3"
1213
"github.com/cockroachdb/cockroach/pkg/col/coldata"
1314
"github.com/cockroachdb/cockroach/pkg/sql/types"
1415
"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
1516
"github.com/cockroachdb/cockroach/pkg/util/uuid"
1617
"github.com/cockroachdb/cockroach/pkg/workload"
17-
randold "golang.org/x/exp/rand"
1818
)
1919

2020
// These constants are all set by the spec - they're not knobs. Don't change
@@ -34,20 +34,20 @@ const (
3434
maxOrderLinesPerOrder = 15
3535

3636
originalString = "ORIGINAL"
37-
wYtd = 300000.00
38-
ytd = 30000.00
3937
nextOrderID = 3001
40-
creditLimit = 50000.00
41-
balance = -10.00
42-
ytdPayment = 10.00
4338
paymentCount = 1
4439
deliveryCount = 0
4540
)
4641

4742
var (
48-
middleName = []byte(`OE`)
49-
goodCredit = []byte("GC")
50-
badCredit = []byte("BC")
43+
middleName = []byte(`OE`)
44+
goodCredit = []byte("GC")
45+
badCredit = []byte("BC")
46+
wYtd = makeDecimal(300000.00, 2)
47+
ytd = makeDecimal(30000.00, 2)
48+
creditLimit = makeDecimal(50000.00, 2)
49+
balance = makeDecimal(-10.00, 2)
50+
ytdPayment = makeDecimal(10.00, 2)
5151
)
5252

5353
// These constants configure how we split the tables when splitting is enabled.
@@ -58,15 +58,14 @@ const (
5858

5959
type generateLocals struct {
6060
rng tpccRand
61-
rngOld tpccRandOld
6261
uuidAlloc uuid.UUID
6362
}
6463

6564
var itemTypes = []*types.T{
6665
types.Int,
6766
types.Int,
6867
types.Bytes,
69-
types.Float,
68+
types.MakeDecimal(5, 2),
7069
types.Bytes,
7170
}
7271

@@ -80,9 +79,9 @@ func (w *tpcc) tpccItemInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufalloc
8079

8180
cb.Reset(itemTypes, 1, coldata.StandardColumnFactory)
8281
cb.ColVec(0).Int64()[0] = int64(iID)
83-
cb.ColVec(1).Int64()[0] = randInt(l.rng.Rand, 1, 10000) // im_id: "Image ID associated to Item"
84-
cb.ColVec(2).Bytes().Set(0, randAStringInitialDataOnly(&l.rng, &ao, a, 14, 24)) // name
85-
cb.ColVec(3).Float64()[0] = float64(randInt(l.rng.Rand, 100, 10000)) / float64(100) // price
82+
cb.ColVec(1).Int64()[0] = randInt(l.rng.Rand, 1, 10000) // im_id: "Image ID associated to Item"
83+
cb.ColVec(2).Bytes().Set(0, randAStringInitialDataOnly(&l.rng, &ao, a, 14, 24)) // name
84+
cb.ColVec(3).Decimal()[0] = randDecimal(l.rng.Rand, 1.000, 100.00, 2) // price
8685
cb.ColVec(4).Bytes().Set(0, randOriginalStringInitialDataOnly(&l.rng, &ao, a))
8786
}
8887

@@ -107,8 +106,8 @@ var warehouseTypes = []*types.T{
107106
types.Bytes,
108107
types.Bytes,
109108
types.Bytes,
110-
types.Float,
111-
types.Float,
109+
types.MakeDecimal(4, 4),
110+
types.MakeDecimal(12, 2),
112111
}
113112

114113
func (w *tpcc) tpccWarehouseInitialRowBatch(
@@ -130,8 +129,8 @@ func (w *tpcc) tpccWarehouseInitialRowBatch(
130129
cb.ColVec(4).Bytes().Set(0, []byte(strconv.FormatInt(randInt(l.rng.Rand, 10, 20), 10))) // city
131130
cb.ColVec(5).Bytes().Set(0, randStateInitialDataOnly(&l.rng, &lo, a))
132131
cb.ColVec(6).Bytes().Set(0, randZipInitialDataOnly(&l.rng, &no, a))
133-
cb.ColVec(7).Float64()[0] = randTax(l.rng.Rand)
134-
cb.ColVec(8).Float64()[0] = wYtd
132+
cb.ColVec(7).Decimal()[0] = randTax(l.rng.Rand)
133+
cb.ColVec(8).Decimal()[0] = wYtd
135134
}
136135

137136
func (w *tpcc) tpccWarehouseStats() []workload.JSONStatistic {
@@ -236,8 +235,8 @@ var districtTypes = []*types.T{
236235
types.Bytes,
237236
types.Bytes,
238237
types.Bytes,
239-
types.Float,
240-
types.Float,
238+
types.MakeDecimal(4, 4),
239+
types.MakeDecimal(12, 2),
241240
types.Int,
242241
}
243242

@@ -263,8 +262,8 @@ func (w *tpcc) tpccDistrictInitialRowBatch(
263262
cb.ColVec(5).Bytes().Set(0, randAStringInitialDataOnly(&l.rng, &ao, a, 10, 20)) // city
264263
cb.ColVec(6).Bytes().Set(0, randStateInitialDataOnly(&l.rng, &lo, a))
265264
cb.ColVec(7).Bytes().Set(0, randZipInitialDataOnly(&l.rng, &no, a))
266-
cb.ColVec(8).Float64()[0] = randTax(l.rng.Rand)
267-
cb.ColVec(9).Float64()[0] = ytd
265+
cb.ColVec(8).Decimal()[0] = randTax(l.rng.Rand)
266+
cb.ColVec(9).Decimal()[0] = ytd
268267
cb.ColVec(10).Int64()[0] = nextOrderID
269268
}
270269

@@ -305,10 +304,10 @@ var customerTypes = []*types.T{
305304
types.Bytes,
306305
types.Timestamp,
307306
types.Bytes,
308-
types.Float,
309-
types.Float,
310-
types.Float,
311-
types.Float,
307+
types.MakeDecimal(12, 2),
308+
types.MakeDecimal(4, 4),
309+
types.MakeDecimal(12, 2),
310+
types.MakeDecimal(12, 2),
312311
types.Int,
313312
types.Int,
314313
types.Bytes,
@@ -359,10 +358,10 @@ func (w *tpcc) tpccCustomerInitialRowBatch(
359358
cb.ColVec(11).Bytes().Set(0, randNStringInitialDataOnly(&l.rng, &no, a, 16, 16)) // phone number
360359
cb.ColVec(12).Timestamp()[0] = w.nowTime
361360
cb.ColVec(13).Bytes().Set(0, credit)
362-
cb.ColVec(14).Float64()[0] = creditLimit
363-
cb.ColVec(15).Float64()[0] = float64(randInt(l.rng.Rand, 0, 5000)) / float64(10000.0) // discount
364-
cb.ColVec(16).Float64()[0] = balance
365-
cb.ColVec(17).Float64()[0] = ytdPayment
361+
cb.ColVec(14).Decimal()[0] = creditLimit
362+
cb.ColVec(15).Decimal()[0] = randDecimal(l.rng.Rand, 0, 0.5000, 4) // discount
363+
cb.ColVec(16).Decimal()[0] = balance
364+
cb.ColVec(17).Decimal()[0] = ytdPayment
366365
cb.ColVec(18).Int64()[0] = paymentCount
367366
cb.ColVec(19).Int64()[0] = deliveryCount
368367
cb.ColVec(20).Bytes().Set(0, randAStringInitialDataOnly(&l.rng, &ao, a, 300, 500)) // data
@@ -410,7 +409,7 @@ var historyTypes = []*types.T{
410409
types.Int,
411410
types.Int,
412411
types.Timestamp,
413-
types.Float,
412+
types.MakeDecimal(6, 2),
414413
types.Bytes,
415414
}
416415

@@ -444,7 +443,7 @@ func (w *tpcc) tpccHistoryInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufal
444443
cb.ColVec(4).Int64()[0] = int64(dID)
445444
cb.ColVec(5).Int64()[0] = int64(wID)
446445
cb.ColVec(6).Timestamp()[0] = w.nowTime
447-
cb.ColVec(7).Float64()[0] = 10.00
446+
cb.ColVec(7).Decimal()[0] = makeDecimal(10.00, 2)
448447
cb.ColVec(8).Bytes().Set(0, randAStringInitialDataOnly(&l.rng, &ao, a, 12, 24))
449448
}
450449

@@ -482,8 +481,8 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
482481

483482
// NB: numOrderLines is not allowed to use precomputed random data, make sure
484483
// it stays that way. See 4.3.2.1.
485-
l.rngOld.Seed(RandomSeed.Seed() + uint64(rowIdx))
486-
numOrderLines := randIntOld(l.rngOld.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder)
484+
l.rng.Rand = rand.New(rand.NewPCG(RandomSeed.Seed(), uint64(rowIdx)))
485+
numOrderLines := randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder)
487486

488487
oID := (rowIdx % numOrdersPerDistrict) + 1
489488
dID := ((rowIdx / numOrdersPerDistrict) % numDistrictsPerWarehouse) + 1
@@ -502,7 +501,7 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
502501
// We need a random permutation of customers that stable for all orders in a
503502
// district, so use the district ID to seed the random permutation.
504503
w.randomCIDsCache.values[dID] = make([]int, numCustomersPerDistrict)
505-
for i, cID := range randold.New(randold.NewSource(uint64(dID))).Perm(numCustomersPerDistrict) {
504+
for i, cID := range rand.New(rand.NewPCG(uint64(dID), 0)).Perm(numCustomersPerDistrict) {
506505
w.randomCIDsCache.values[dID][i] = cID + 1
507506
}
508507
}
@@ -514,7 +513,7 @@ func (w *tpcc) tpccOrderInitialRowBatch(rowIdx int, cb coldata.Batch, a *bufallo
514513
var carrierID int64
515514
if oID < 2101 {
516515
carrierSet = true
517-
carrierID = randIntOld(l.rngOld.Rand, 1, 10)
516+
carrierID = randInt(l.rng.Rand, 1, 10)
518517
}
519518

520519
cb.Reset(orderTypes, 1, coldata.StandardColumnFactory)
@@ -591,7 +590,7 @@ var orderLineTypes = []*types.T{
591590
types.Int,
592591
types.Timestamp,
593592
types.Int,
594-
types.Float,
593+
types.MakeDecimal(6, 2),
595594
types.Bytes,
596595
}
597596

@@ -603,15 +602,15 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
603602

604603
// NB: numOrderLines is not allowed to use precomputed random data, make sure
605604
// it stays that way. See 4.3.2.1.
606-
l.rngOld.Seed(RandomSeed.Seed() + uint64(orderRowIdx))
607-
numOrderLines := int(randIntOld(l.rngOld.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder))
605+
l.rng.Rand = rand.New(rand.NewPCG(RandomSeed.Seed(), uint64(orderRowIdx)))
606+
numOrderLines := int(randInt(l.rng.Rand, minOrderLinesPerOrder, maxOrderLinesPerOrder))
608607

609608
// NB: There is one batch of order_line rows per order
610609
oID := (orderRowIdx % numOrdersPerDistrict) + 1
611610
dID := ((orderRowIdx / numOrdersPerDistrict) % numDistrictsPerWarehouse) + 1
612611
wID := (orderRowIdx / numOrdersPerWarehouse)
613612

614-
ao := aCharsOffset(l.rngOld.Intn(len(aCharsAlphabet)))
613+
ao := aCharsOffset(l.rng.IntN(len(aCharsAlphabet)))
615614
cb.Reset(orderLineTypes, numOrderLines, coldata.StandardColumnFactory)
616615
olOIDCol := cb.ColVec(0).Int64()
617616
olDIDCol := cb.ColVec(1).Int64()
@@ -623,27 +622,27 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
623622
olDeliveryD.Nulls().UnsetNulls()
624623
olDeliveryDCol := olDeliveryD.Timestamp()
625624
olQuantityCol := cb.ColVec(7).Int64()
626-
olAmountCol := cb.ColVec(8).Float64()
625+
olAmountCol := cb.ColVec(8).Decimal()
627626
olDistInfoCol := cb.ColVec(9).Bytes()
628627

629628
olDistInfoCol.Reset()
630629
for rowIdx := 0; rowIdx < numOrderLines; rowIdx++ {
631630
olNumber := rowIdx + 1
632631

633-
var amount float64
632+
var amount apd.Decimal
634633
var deliveryDSet bool
635634
if oID < 2101 {
636-
amount = 0
635+
amount = makeDecimal(0, 2)
637636
deliveryDSet = true
638637
} else {
639-
amount = float64(randIntOld(l.rngOld.Rand, 1, 999999)) / 100.0
638+
amount = randDecimal(l.rng.Rand, 0.01, 9999.99, 2)
640639
}
641640

642641
olOIDCol[rowIdx] = int64(oID)
643642
olDIDCol[rowIdx] = int64(dID)
644643
olWIDCol[rowIdx] = int64(wID)
645644
olNumberCol[rowIdx] = int64(olNumber)
646-
olIIDCol[rowIdx] = randIntOld(l.rngOld.Rand, 1, 100000)
645+
olIIDCol[rowIdx] = randInt(l.rng.Rand, 1, 100000)
647646
olSupplyWIDCol[rowIdx] = int64(wID)
648647
if deliveryDSet {
649648
olDeliveryDCol.Set(rowIdx, w.nowTime)
@@ -652,7 +651,7 @@ func (w *tpcc) tpccOrderLineInitialRowBatch(
652651
}
653652
olQuantityCol[rowIdx] = 5
654653
olAmountCol[rowIdx] = amount
655-
olDistInfoCol.Set(rowIdx, randAStringInitialDataOnlyOld(&l.rngOld, &ao, a, 24, 24))
654+
olDistInfoCol.Set(rowIdx, randAStringInitialDataOnly(&l.rng, &ao, a, 24, 24))
656655
}
657656
}
658657

0 commit comments

Comments
 (0)