Skip to content

Commit 1fd3eb8

Browse files
craig[bot]rafissMiral Gadanipav-kv
committed
108121: sql: rewind txn sequence number in internal executor r=rafiss a=rafiss If the internal executor is used by a user-initiated query, then it shares the same transaction as the user's query. In that case, it's important to step back the sequence number so that a single statement does not read its own writes. fixes cockroachdb#86162 Release note: None 108201: roachtest: suppress grafana link in issue help for non GCE r=smg260 a=smg260 We don't want to show a grafana when the issues originates from non GCE tests/clusters, since we don't scrape from those yet. Epic: none Release note: None 108427: roachtest: provision 250 MB/s for 8TB restore test r=msbutler a=pavelkalinnikov The `restore/tpce/8TB/aws/nodes=10/cpus=8` test maxes out the default 125 MB/s EBS throughput. This commit provisions throughput to be 250 MB/s so that the test doesn't work at the edge of overload. See cockroachdb#107609 (comment) for the before/after comparison. Touches cockroachdb#106496 Fixes cockroachdb#107609 Epic: none Release note: none Co-authored-by: Rafi Shamim <[email protected]> Co-authored-by: Miral Gadani <[email protected]> Co-authored-by: Pavel Kalinnikov <[email protected]>
4 parents 00f1d4e + 0b6987f + b0535f1 + e6dbabb commit 1fd3eb8

File tree

8 files changed

+68
-9
lines changed

8 files changed

+68
-9
lines changed

pkg/cmd/roachtest/github.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818

1919
"github.com/cockroachdb/cockroach/pkg/cmd/internal/issues"
2020
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
21+
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
2122
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
2223
"github.com/cockroachdb/cockroach/pkg/internal/team"
2324
rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors"
@@ -49,7 +50,7 @@ func roachtestPrefix(p string) string {
4950

5051
// generateHelpCommand creates a HelpCommand for createPostRequest
5152
func generateHelpCommand(
52-
clusterName string, start time.Time, end time.Time,
53+
clusterName string, cloud string, start time.Time, end time.Time,
5354
) func(renderer *issues.Renderer) {
5455
return func(renderer *issues.Renderer) {
5556
issues.HelpCommandAsLink(
@@ -60,8 +61,9 @@ func generateHelpCommand(
6061
"How To Investigate (internal)",
6162
"https://cockroachlabs.atlassian.net/l/c/SSSBr8c7",
6263
)(renderer)
63-
// An empty clusterName corresponds to a cluster creation failure
64-
if clusterName != "" {
64+
// An empty clusterName corresponds to a cluster creation failure.
65+
// We only scrape metrics from GCE clusters for now.
66+
if spec.GCE == cloud && clusterName != "" {
6567
issues.HelpCommandAsLink(
6668
"Grafana",
6769
fmt.Sprintf("https://go.crdb.dev/p/roachfana/%s/%d/%d", clusterName, start.UnixMilli(), end.UnixMilli()),
@@ -223,7 +225,7 @@ func (g *githubIssues) createPostRequest(
223225
Artifacts: artifacts,
224226
ExtraLabels: labels,
225227
ExtraParams: clusterParams,
226-
HelpCommand: generateHelpCommand(issueClusterName, start, end),
228+
HelpCommand: generateHelpCommand(issueClusterName, spec.Cluster.Cloud, start, end),
227229
}, nil
228230
}
229231

pkg/cmd/roachtest/github_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,14 @@ func TestGenerateHelpCommand(t *testing.T) {
111111
end := time.Date(2023, time.July, 21, 16, 42, 13, 137, time.UTC)
112112

113113
r := &issues.Renderer{}
114-
generateHelpCommand("foo-cluster", start, end)(r)
114+
generateHelpCommand("foo-cluster", spec.GCE, start, end)(r)
115115

116116
echotest.Require(t, r.String(), filepath.Join("testdata", "help_command.txt"))
117+
118+
r = &issues.Renderer{}
119+
generateHelpCommand("foo-cluster", spec.AWS, start, end)(r)
120+
121+
echotest.Require(t, r.String(), filepath.Join("testdata", "help_command_non_gce.txt"))
117122
}
118123

119124
func TestCreatePostRequest(t *testing.T) {

pkg/cmd/roachtest/spec/cluster_spec.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ type ClusterSpec struct {
9393
// generality. Make it easier to just inject cloud-specific arguments.
9494
GCEMinCPUPlatform string
9595
GCEVolumeType string
96+
// AWS-specific arguments.
97+
//
98+
// AWSVolumeThroughput is the min provisioned EBS volume throughput.
99+
AWSVolumeThroughput int
96100
}
97101

98102
// MakeClusterSpec makes a ClusterSpec.
@@ -140,11 +144,16 @@ func awsMachineSupportsSSD(machineType string) bool {
140144
return false
141145
}
142146

143-
func getAWSOpts(machineType string, zones []string, volumeSize int, localSSD bool) vm.ProviderOpts {
147+
func getAWSOpts(
148+
machineType string, zones []string, volumeSize, ebsThroughput int, localSSD bool,
149+
) vm.ProviderOpts {
144150
opts := aws.DefaultProviderOpts()
145151
if volumeSize != 0 {
146152
opts.DefaultEBSVolume.Disk.VolumeSize = volumeSize
147153
}
154+
if ebsThroughput != 0 {
155+
opts.DefaultEBSVolume.Disk.Throughput = ebsThroughput
156+
}
148157
if localSSD {
149158
opts.SSDMachineType = machineType
150159
} else {
@@ -310,7 +319,8 @@ func (s *ClusterSpec) RoachprodOpts(
310319
var providerOpts vm.ProviderOpts
311320
switch s.Cloud {
312321
case AWS:
313-
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, createVMOpts.SSDOpts.UseLocalSSD)
322+
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWSVolumeThroughput,
323+
createVMOpts.SSDOpts.UseLocalSSD)
314324
case GCE:
315325
providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount,
316326
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
echo
2+
----
3+
----
4+
5+
6+
See: [roachtest README](https://github.com/cockroachdb/cockroach/blob/master/pkg/cmd/roachtest/README.md)
7+
8+
9+
10+
See: [How To Investigate \(internal\)](https://cockroachlabs.atlassian.net/l/c/SSSBr8c7)
11+
12+
----
13+
----

pkg/cmd/roachtest/tests/restore.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,10 @@ func registerRestore(r registry.Registry) {
323323
},
324324
{
325325
// The nightly 8TB Restore test.
326-
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000}),
326+
// NB: bump disk throughput because this load saturates the default 125
327+
// MB/s. See https://github.com/cockroachdb/cockroach/issues/107609.
328+
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000,
329+
ebsThroughput: 250 /* MB/s */}),
327330
backup: makeRestoringBackupSpecs(backupSpecs{
328331
version: "v22.2.1",
329332
workload: tpceRestore{customers: 500000}}),
@@ -466,6 +469,9 @@ type hardwareSpecs struct {
466469
// volumeSize indicates the size of per node block storage (pd-ssd for gcs,
467470
// ebs for aws). If zero, local ssd's are used.
468471
volumeSize int
472+
// ebsThroughput is the min provisioned throughput of the EBS volume, in MB/s.
473+
// TODO(pavelkalinnikov): support provisioning throughput not only on EBS.
474+
ebsThroughput int
469475

470476
// mem is the memory per cpu.
471477
mem spec.MemPerCPU
@@ -494,6 +500,10 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string
494500
}
495501
s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...)
496502

503+
if hw.ebsThroughput != 0 {
504+
s.AWSVolumeThroughput = hw.ebsThroughput
505+
}
506+
497507
if backupCloud == spec.AWS && s.Cloud == spec.AWS && s.VolumeSize != 0 {
498508
// Work around an issue that RAID0s local NVMe and GP3 storage together:
499509
// https://github.com/cockroachdb/cockroach/issues/98783.
@@ -554,6 +564,9 @@ func makeHardwareSpecs(override hardwareSpecs) hardwareSpecs {
554564
if override.volumeSize != 0 {
555565
specs.volumeSize = override.volumeSize
556566
}
567+
if override.ebsThroughput != 0 {
568+
specs.ebsThroughput = override.ebsThroughput
569+
}
557570
specs.zones = override.zones
558571
specs.workloadNode = override.workloadNode
559572
return specs

pkg/roachprod/vm/gce/gcloud.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,9 @@ func (p *Provider) Create(
10091009
fmt.Sprintf("size=%dGB", providerOpts.PDVolumeSize),
10101010
"auto-delete=yes",
10111011
}
1012+
// TODO(pavelkalinnikov): support disk types with "provisioned-throughput"
1013+
// option, such as Hyperdisk Throughput:
1014+
// https://cloud.google.com/compute/docs/disks/add-hyperdisk#hyperdisk-throughput.
10121015
args = append(args, "--create-disk", strings.Join(pdProps, ","))
10131016
// Enable DISCARD commands for persistent disks, as is advised in:
10141017
// https://cloud.google.com/compute/docs/disks/optimizing-pd-performance#formatting_parameters.

pkg/sql/conn_executor_exec.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,18 @@ func (ex *connExecutor) execStmtInOpenState(
961961
// single/common function. That would be where the stepping mode
962962
// gets enabled once for all SQL statements executed "underneath".
963963
prevSteppingMode := ex.state.mu.txn.ConfigureStepping(ctx, kv.SteppingEnabled)
964-
defer func() { _ = ex.state.mu.txn.ConfigureStepping(ctx, prevSteppingMode) }()
964+
prevSeqNum := ex.state.mu.txn.GetReadSeqNum()
965+
defer func() {
966+
_ = ex.state.mu.txn.ConfigureStepping(ctx, prevSteppingMode)
967+
968+
// If this is an internal executor that is running on behalf of an outer
969+
// txn, then we need to step back the txn so that the outer executor uses
970+
// the proper sequence number.
971+
if ex.executorType == executorTypeInternal && ex.extraTxnState.fromOuterTxn {
972+
err := ex.state.mu.txn.SetReadSeqNum(prevSeqNum)
973+
retEv, retPayload, retErr = makeErrEvent(err)
974+
}
975+
}()
965976

966977
// Then we create a sequencing point.
967978
//

pkg/sql/conn_executor_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,8 @@ SELECT IF(x::INT::FLOAT = x,
450450
'NOOPE', 'insert saw its own writes: ' || x::STRING || ' (it is halloween today)')::FLOAT)
451451
+ 0.1
452452
FROM t.test
453+
-- the function used here is implemented by using the internal executor.
454+
WHERE has_table_privilege('root', ((x+.1)/(x+1) + 1)::int::oid, 'INSERT') IS NULL
453455
`); err != nil {
454456
t.Fatal(err)
455457
}

0 commit comments

Comments
 (0)