Skip to content

Commit b329224

Browse files
committed
roachprod: auto restart option for cockroach processes
Previously, if a cockroach process died, it needed to be manually restarted. This is expected in most circumstances. But this behavior might not always be practical, for instance, when running a long unattended benchmark. To support this, a new start option has been added that automatically restarts the service if a process fails (e.g., due to a disk stall). This ensures the process comes back online quickly without manual intervention. Epic: None Release note: None
1 parent 20c3c8e commit b329224

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

pkg/roachprod/install/cockroach.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ type StartOpts struct {
152152
// initialization and sequential node starts and also reuses the previous start script.
153153
IsRestart bool
154154

155+
// AutoRestart enables automatically restarting a process if it died.
156+
AutoRestart bool
157+
155158
// EnableFluentSink determines whether to enable the fluent-servers attribute
156159
// in the CockroachDB logging configuration.
157160
EnableFluentSink bool
@@ -1004,6 +1007,7 @@ func (c *SyncedCluster) generateStartCmd(
10041007
NumFilesLimit: startOpts.NumFilesLimit,
10051008
VirtualClusterLabel: VirtualClusterLabel(startOpts.VirtualClusterName, startOpts.SQLInstance),
10061009
Local: c.IsLocal(),
1010+
AutoRestart: startOpts.AutoRestart,
10071011
})
10081012
}
10091013

@@ -1017,6 +1021,7 @@ type startTemplateData struct {
10171021
VirtualClusterLabel string
10181022
Args []string
10191023
EnvVars []string
1024+
AutoRestart bool
10201025
}
10211026

10221027
type loggingTemplateData struct {

pkg/roachprod/install/scripts/start.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ KEY_CMD=#{.KeyCmd#}
1616
MEMORY_MAX=#{.MemoryMax#}
1717
NUM_FILES_LIMIT=#{.NumFilesLimit#}
1818
VIRTUAL_CLUSTER_LABEL=#{.VirtualClusterLabel#}
19+
#{if .AutoRestart#}
20+
AUTO_RESTART=1
21+
#{end#}
22+
1923
ARGS=(
2024
#{range .Args -#}
2125
#{shesc .#}
@@ -99,4 +103,5 @@ sudo systemd-run --unit "${VIRTUAL_CLUSTER_LABEL}" \
99103
-p "MemoryMax=${MEMORY_MAX}" \
100104
-p LimitCORE=infinity \
101105
-p "LimitNOFILE=${NUM_FILES_LIMIT}" \
106+
${AUTO_RESTART:+-p Restart=always -p RestartSec=5s} \
102107
bash "${0}" run

0 commit comments

Comments
 (0)