Skip to content

Commit 856a964

Browse files
drtprod: update PUA yaml configs
This commits updates the PUA yaml configs to run via CI in cockroach-ephemeral project. Epic: none Release note: None
1 parent 4751177 commit 856a964

File tree

5 files changed

+103
-53
lines changed

5 files changed

+103
-53
lines changed

build/teamcity/internal/cockroach/pua/pua_run.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,12 @@
55
# Use of this software is governed by the CockroachDB Software License
66
# included in the /LICENSE file.
77

8-
98
set -exuo pipefail
109

1110
dir="$(dirname $(dirname $(dirname $(dirname $(dirname "${0}")))))"
1211

1312
source "$dir/teamcity-support.sh" # For $root
1413
source "$dir/teamcity-bazel-support.sh" # For run_bazel
1514
#
16-
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e GOOGLE_APPLICATION_CREDENTIALS_CONTENT -e GOOGLE_SERVICE_ACCOUNT" \
15+
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e GOOGLE_APPLICATION_CREDENTIALS_CONTENT -e GOOGLE_SERVICE_ACCOUNT -e PUA_CONFIG -e CRDB_VERSION -e CRDB_UPGRADE_VERSION -e DD_API_KEY -e DD_APP_KEY" \
1716
run_bazel build/teamcity/internal/cockroach/pua/pua_run_impl.sh

build/teamcity/internal/cockroach/pua/pua_run_impl.sh

Lines changed: 87 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,36 +5,60 @@
55
# Use of this software is governed by the CockroachDB Software License
66
# included in the /LICENSE file.
77

8-
98
set -exuo pipefail
109

1110
export ROACHPROD_DISABLED_PROVIDERS=aws,azure,ibm
1211
export ROACHPROD_DISABLE_UPDATE_CHECK=true
13-
export ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT=$GOOGLE_SERVICE_ACCOUNT
14-
export ROACHPROD_GCE_DEFAULT_PROJECT=cockroach-ephemeral
1512

16-
# build the binaries - roachprod, roachtest and drtprod
13+
export ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT=${GOOGLE_SERVICE_ACCOUNT:-teamcity-pua@cockroach-ephemeral.iam.gserviceaccount.com}
14+
export ROACHPROD_GCE_DEFAULT_PROJECT=${GOOGLE_PROJECT:-cockroach-ephemeral}
15+
16+
export ROACHPROD_DNS=${ROACHPROD_DNS:-roachprod.crdb.io}
17+
export ROACHPROD_GCE_DNS_ZONE=${ROACHPROD_GCE_DNS_ZONE:-roachprod}
18+
export ROACHPROD_GCE_DNS_DOMAIN=${ROACHPROD_GCE_DNS_DOMAIN:-roachprod.crdb.io}
19+
20+
# generate the ssh key if it doesn't exist.
21+
if [[ ! -f ~/.ssh/id_rsa.pub ]]; then
22+
ssh-keygen -q -C "teamcity-pua-bazel $(date)" -N "" -f ~/.ssh/id_rsa
23+
fi
24+
25+
# set up google credentials.
26+
if [[ "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" ]]; then
27+
echo "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" > creds.json
28+
gcloud auth activate-service-account --key-file=creds.json
29+
30+
# Set GOOGLE_APPLICATION_CREDENTIALS so that gcp go libraries can find it.
31+
export GOOGLE_APPLICATION_CREDENTIALS="$(pwd)/creds.json"
32+
else
33+
echo 'warning: $GOOGLE_APPLICATION_CREDENTIALS_CONTENT not set' >&2
34+
exit 1
35+
fi
36+
37+
# build the binaries: roachprod, roachtest, and drtprod.
1738
build() {
1839
config="crosslinux"
19-
# Prepare the bin/ and lib/ directories.
20-
mkdir -p bin
21-
chmod o+rwx bin
40+
# prepare the bin/ and artifacts/ directories.
41+
mkdir -p bin artifacts
42+
chmod o+rwx bin artifacts
2243

23-
# Array of arguments to be passed to bazel for the component.
44+
# array of arguments to be passed to bazel for the component.
2445
bazel_args=()
2546

26-
# Array of build artifacts. Each item has format "src:dest"; src is relative to
47+
# array of build artifacts. each item has format "src:dest"; src is relative to
2748
# the bazel-bin directory, dst is relative to cwd.
2849
artifacts=()
2950

3051
bazel_args+=(//pkg/cmd/roachtest)
3152
artifacts+=("pkg/cmd/roachtest/roachtest_/roachtest:bin/roachtest")
53+
artifacts+=("pkg/cmd/roachtest/roachtest_/roachtest:artifacts/roachtest")
3254

3355
bazel_args+=(//pkg/cmd/roachprod)
3456
artifacts+=("pkg/cmd/roachprod/roachprod_/roachprod:bin/roachprod")
57+
artifacts+=("pkg/cmd/roachprod/roachprod_/roachprod:artifacts/roachprod")
3558

3659
bazel_args+=(//pkg/cmd/drtprod)
3760
artifacts+=("pkg/cmd/drtprod/drtprod_/drtprod:bin/drtprod")
61+
artifacts+=("pkg/cmd/drtprod/drtprod_/drtprod:artifacts/drtprod")
3862

3963
bazel build --config $config -c opt "${bazel_args[@]}"
4064
BAZEL_BIN=$(bazel info bazel-bin --config $config -c opt)
@@ -46,25 +70,65 @@ build() {
4670
chmod a+w "$dst"
4771
done
4872

49-
# add bin to path
73+
# add bin to path.
5074
export PATH=$PATH:$(pwd)/bin
5175
}
5276

53-
# Set up Google credentials. Note that we need this for all clouds since we upload
54-
# perf artifacts to Google Storage at the end.
55-
if [[ "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" ]]; then
56-
echo "$GOOGLE_APPLICATION_CREDENTIALS_CONTENT" > creds.json
57-
gcloud auth activate-service-account --key-file=creds.json
77+
# run the build function.
78+
build
5879

59-
# Set GOOGLE_APPLICATION_CREDENTIALS so that gcp go libraries can find it.
60-
export GOOGLE_APPLICATION_CREDENTIALS="$(pwd)/creds.json"
80+
log_file="artifacts/pua.log"
81+
export config=${PUA_CONFIG:-"single_region"}
82+
if [[ "$config" == "single_region" ]]; then
83+
CLUSTER=drt-pua-9
84+
WORKLOAD=workload-pua-9
85+
ZONE_NODE=7-9
86+
config_file="pkg/cmd/drtprod/configs/drt_pua_9.yaml"
87+
elif [[ "$config" == "multi_region" ]]; then
88+
CLUSTER=drt-pua-15
89+
WORKLOAD=workload-pua-15
90+
ZONE_NODE=3-4
91+
config_file="pkg/cmd/drtprod/configs/drt_pua_15.yaml"
6192
else
62-
echo 'warning: GOOGLE_EPHEMERAL_CREDENTIALS not set' >&2
93+
echo "Error: Invalid PUA_CONFIG value: '$config'. Must be 'single_region' or 'multi_region'." >&2
6394
exit 1
6495
fi
6596

66-
# Run the build function
67-
build
68-
#
69-
roachprod list
70-
#drtprod list
97+
# execute the pua benchmark test.
98+
drtprod execute ${config_file} | tee -a "${log_file}"
99+
100+
# the pua dashboard uses a json file to show the benchmark results.
101+
# we will generate the json file from the datadog metrics.
102+
# download metric converter from gcs bucket pua-backup-us-east1.
103+
mkdir -p datadog-metric-converter
104+
gsutil -m cp -r gs://pua-backup-us-east1/datadog-metric-converter/** datadog-metric-converter/
105+
106+
# install pip for python3.8.
107+
curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py -o get-pip.py
108+
python3 get-pip.py
109+
110+
# install the requirements for the metric converter.
111+
python3 -m pip install -r datadog-metric-converter/requirements.txt
112+
113+
# get the start and end time of the benchmark.
114+
epoch_start_time=$(grep "\[Phase-1: Baseline Performance\]" ${log_file} | grep "Starting" | awk -F'[][]' '{print $4}')
115+
epoch_start_time=$((epoch_start_time - 240))
116+
epoch_end_time=$(( $(date +%s) - 120 ))
117+
host=$(hostname)
118+
119+
# generate the benchmark.json file
120+
python3 datadog-metric-converter/convert-datadog-metric.py --start-time=${epoch_start_time} --end-time=${epoch_end_time} \
121+
--cluster-name ${CLUSTER} --workload-name ${WORKLOAD} \
122+
--monitor-host ${host} --zone-node ${ZONE_NODE}
123+
124+
125+
# delete the binaries - roachprod, roachtest and drtprod,
126+
# as we don't need them to be uploaded to TeamCity artifacts
127+
rm -f artifacts/roachprod artifacts/roachtest artifacts/drtprod
128+
cp benchmark.json "artifacts/benchmark.json"
129+
130+
rm -rf datadog-metric-converter
131+
132+
# destroy the clusters.
133+
drtprod destroy ${CLUSTER}
134+
drtprod destroy ${WORKLOAD}

pkg/cmd/drtprod/cli/commands/yamlprocessor.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/cockroachdb/cockroach/pkg/roachprod/config"
2424
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
2525
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
26+
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2627
"github.com/cockroachdb/errors"
2728
"github.com/spf13/cobra"
2829
"golang.org/x/exp/maps"
@@ -556,7 +557,7 @@ func executeCommands(ctx context.Context, logPrefix string, cmds []*command) err
556557
fmt.Printf("[%s] Waiting for %d seconds\n", logPrefix, cmd.waitBefore)
557558
time.Sleep(time.Duration(cmd.waitBefore) * time.Second)
558559
}
559-
fmt.Printf("[%s] Starting <%v>\n", logPrefix, cmd)
560+
fmt.Printf("[%s] [%d] Starting <%v>\n", logPrefix, timeutil.Now().UTC().Unix(), cmd)
560561
err := commandExecutor(ctx, logPrefix, cmd.name, cmd.args...)
561562
if err != nil {
562563
if !cmd.continueOnFailure {
@@ -566,7 +567,7 @@ func executeCommands(ctx context.Context, logPrefix string, cmds []*command) err
566567
// Log the failure and continue if configured to do so
567568
fmt.Printf("[%s] Failed <%v>, Error Ignored: %v\n", logPrefix, cmd, err)
568569
} else {
569-
fmt.Printf("[%s] Completed <%v>\n", logPrefix, cmd)
570+
fmt.Printf("[%s] [%d] Completed <%v>\n", logPrefix, timeutil.Now().UTC().Unix(), cmd)
570571
if cmd.waitAfter > 0 {
571572
fmt.Printf("[%s] Waiting for %d seconds\n", logPrefix, cmd.waitAfter)
572573
time.Sleep(time.Duration(cmd.waitAfter) * time.Second)

pkg/cmd/drtprod/configs/drt_pua_9.yaml

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,11 @@
88
# Additionally, it configures Datadog and includes scripts for running workload and roachtest operations.
99

1010
environment:
11-
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
12-
ROACHPROD_DNS: drt.crdb.io
13-
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
14-
ROACHPROD_GCE_DNS_ZONE: drt
15-
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
1611
CLUSTER: drt-pua-9
1712
CLUSTER_NODES: 9
1813
WORKLOAD_CLUSTER: workload-pua-9
1914
WORKLOAD_NODES: 1
2015
STORE_COUNT: 2
21-
COCKROACH_VERSION: v25.2.0
22-
COCKROACH_UPGRADE_VERSION: v25.2.1
2316

2417
TPCC_WAREHOUSES: 5000
2518
TPCC_ACTIVE_WAREHOUSES: 5000
@@ -29,7 +22,7 @@ environment:
2922
CONNS: 1800
3023

3124
# GCP Cloud Storage bucket for storing backups
32-
BUCKET_US_EAST_1: cockroach-drt-backup-us-east1
25+
BUCKET_US_EAST_1: pua-backup-us-east1
3326

3427
dependent_file_locations:
3528
- artifacts/roachprod
@@ -60,7 +53,7 @@ targets:
6053
local-ssd: true
6154
gce-local-ssd-count: $STORE_COUNT
6255
username: drt
63-
lifetime: 8760h
56+
lifetime: 15h
6457
gce-image: "ubuntu-2204-jammy-v20240319"
6558
on_rollback:
6659
- command: destroy
@@ -73,7 +66,7 @@ targets:
7366
args:
7467
- $CLUSTER
7568
- release
76-
- $COCKROACH_VERSION
69+
- $CRDB_VERSION
7770
- script: "pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller"
7871
- script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster"
7972
- command: start
@@ -139,7 +132,7 @@ targets:
139132
gce-machine-type: n2-standard-8
140133
os-volume-size: 100
141134
username: workload
142-
lifetime: 8760h
135+
lifetime: 15h
143136
on_rollback:
144137
- command: destroy
145138
args:
@@ -221,7 +214,7 @@ targets:
221214
active-warehouses: $TPCC_ACTIVE_WAREHOUSES
222215
duration: $RUN_DURATION
223216
ramp: 5m
224-
wait_after: true
217+
wait: true
225218
max-conn-lifetime: $MAX_CONN_LIFETIME
226219
conns: $CONNS
227220
- script: "pkg/cmd/drtprod/scripts/pua_operations.sh"
@@ -277,7 +270,7 @@ targets:
277270
args:
278271
- $CLUSTER
279272
- release
280-
- $COCKROACH_UPGRADE_VERSION
273+
- $CRDB_UPGRADE_VERSION
281274
flags:
282275
pause: 5m
283276
grace-period: 500

pkg/cmd/drtprod/configs/drt_pua_mr.yaml

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,11 @@
77
# This yaml also creates a workload cluster with 3 nodes in 3 regions, 1 node in each region.
88
# This also configures datadog and scripts for running workload and roachtest operations.
99
environment:
10-
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
11-
ROACHPROD_DNS: drt.crdb.io
12-
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
13-
ROACHPROD_GCE_DNS_ZONE: drt
14-
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
1510
CLUSTER: drt-pua-15
1611
WORKLOAD_CLUSTER: workload-pua-15
1712
CLUSTER_NODES: 15
1813
WORKLOAD_NODES: 3
1914
STORE_COUNT: 2
20-
COCKROACH_VERSION: v25.2.0
21-
COCKROACH_UPGRADE_VERSION: v25.2.1
2215

2316
# variables used by tpcc_run_multiregion.sh
2417
NUM_REGIONS: 3
@@ -35,9 +28,9 @@ environment:
3528
MAX_CONN_LIFETIME: 3m
3629

3730
# GCP Cloud Storage bucket for storing locality-aware backups
38-
BUCKET_NORTH_AMERICA: cockroach-drt-backup
39-
BUCKET_US_EAST_5: cockroach-drt-backup-us-east5
40-
BUCKET_US_EAST_1: cockroach-drt-backup-us-east1
31+
BUCKET_US_CENTRAL1: pua-backup-us-central-1
32+
BUCKET_US_EAST_5: pua-backup-us-east5
33+
BUCKET_US_EAST_1: pua-backup-us-east1
4134

4235
dependent_file_locations:
4336
- artifacts/roachprod
@@ -79,7 +72,7 @@ targets:
7972
args:
8073
- $CLUSTER
8174
- release
82-
- $COCKROACH_VERSION
75+
- $CRDB_VERSION
8376
- script: "pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller"
8477
- script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster"
8578
- command: start
@@ -260,7 +253,7 @@ targets:
260253
- --
261254
- -e
262255
- |
263-
BACKUP INTO ('gs://$BUCKET_NORTH_AMERICA/$CLUSTER?AUTH=implicit&COCKROACH_LOCALITY=default',
256+
BACKUP INTO ('gs://$BUCKET_US_CENTRAL1/$CLUSTER?AUTH=implicit&COCKROACH_LOCALITY=default',
264257
'gs://$BUCKET_US_EAST_5/$CLUSTER?AUTH=implicit&COCKROACH_LOCALITY=region%3Dus-east5',
265258
'gs://$BUCKET_US_EAST_1/$CLUSTER?AUTH=implicit&COCKROACH_LOCALITY=region%3Dus-east1')
266259
WITH OPTIONS (revision_history = true, detached)
@@ -283,7 +276,7 @@ targets:
283276
args:
284277
- $CLUSTER
285278
- release
286-
- $COCKROACH_UPGRADE_VERSION
279+
- $CRDB_UPGRADE_VERSION
287280
flags:
288281
pause: 5m
289282
grace-period: 500

0 commit comments

Comments
 (0)