Skip to content

Commit 6d9bfa3

Browse files
authored
Pass public key name in every client ssh connection (#3775)
## Changes Before we were passing a client key name (a secret name that contains the actual client public key) to the ssh server job. After it's started, the ssh server was only accepting one client with one specific key. This is a problem when you try to ssh to the same cluster under the same databricks user from multiple different client machines (that have different ssh keys). Our integration tests can't work consistently because of this, and we've already got one customer report about this problem. The main change here is that now we don't always generate key-pairs on the client, but check if the pair already exists in the secret scope and use it. And if not, we generate the pair and save both keys to the scope (and before we were only saving public key there)
1 parent c2d0e9d commit 6d9bfa3

File tree

9 files changed

+73
-120
lines changed

9 files changed

+73
-120
lines changed

experimental/ssh/cmd/connect.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,19 @@ the SSH server and handling the connection proxy.
6161
ctx := cmd.Context()
6262
wsClient := cmdctx.WorkspaceClient(ctx)
6363
opts := client.ClientOptions{
64-
ClusterID: clusterID,
65-
ProxyMode: proxyMode,
66-
ServerMetadata: serverMetadata,
67-
ShutdownDelay: shutdownDelay,
68-
MaxClients: maxClients,
69-
HandoverTimeout: handoverTimeout,
70-
ReleasesDir: releasesDir,
71-
AdditionalArgs: args,
72-
ClientPublicKeyName: defaultClientPublicKeyName,
73-
ServerTimeout: serverTimeout,
74-
AutoStartCluster: autoStartCluster,
75-
Profile: wsClient.Config.Profile,
64+
Profile: wsClient.Config.Profile,
65+
ClusterID: clusterID,
66+
ProxyMode: proxyMode,
67+
ServerMetadata: serverMetadata,
68+
ShutdownDelay: shutdownDelay,
69+
MaxClients: maxClients,
70+
HandoverTimeout: handoverTimeout,
71+
ReleasesDir: releasesDir,
72+
ServerTimeout: serverTimeout,
73+
AutoStartCluster: autoStartCluster,
74+
ClientPublicKeyName: clientPublicKeyName,
75+
ClientPrivateKeyName: clientPrivateKeyName,
76+
AdditionalArgs: args,
7677
}
7778
return client.Run(ctx, wsClient, opts)
7879
}

experimental/ssh/cmd/constants.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ const (
88
defaultShutdownDelay = 10 * time.Minute
99
defaultHandoverTimeout = 30 * time.Minute
1010

11-
serverTimeout = 24 * time.Hour
12-
serverPortRange = 100
13-
serverConfigDir = ".ssh-tunnel"
14-
serverPrivateKeyName = "server-private-key"
15-
serverPublicKeyName = "server-public-key"
16-
defaultClientPublicKeyName = "client-public-key"
11+
serverTimeout = 24 * time.Hour
12+
serverPortRange = 100
13+
serverConfigDir = ".ssh-tunnel"
14+
serverPrivateKeyName = "server-private-key"
15+
serverPublicKeyName = "server-public-key"
16+
clientPrivateKeyName = "client-private-key"
17+
clientPublicKeyName = "client-public-key"
1718
)

experimental/ssh/cmd/server.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ and proxies them to local SSH daemon processes.
2727
var shutdownDelay time.Duration
2828
var clusterID string
2929
var version string
30-
var keysSecretScopeName string
31-
var authorizedKeyName string
30+
var secretScopeName string
31+
var authorizedKeySecretName string
3232

3333
cmd.Flags().StringVar(&clusterID, "cluster", "", "Databricks cluster ID")
3434
cmd.MarkFlagRequired("cluster")
35-
cmd.Flags().StringVar(&keysSecretScopeName, "keys-secret-scope-name", "", "Databricks secret scope name to store SSH keys")
36-
cmd.MarkFlagRequired("keys-secret-scope-name")
37-
cmd.Flags().StringVar(&authorizedKeyName, "authorized-key-secret-name", "", "Authorized key secret name in the secret scope")
35+
cmd.Flags().StringVar(&secretScopeName, "secret-scope-name", "", "Databricks secret scope name to store SSH keys")
36+
cmd.MarkFlagRequired("secret-scope-name")
37+
cmd.Flags().StringVar(&authorizedKeySecretName, "authorized-key-secret-name", "", "Name of the secret containing the client public key")
3838
cmd.MarkFlagRequired("authorized-key-secret-name")
3939

4040
cmd.Flags().IntVar(&maxClients, "max-clients", defaultMaxClients, "Maximum number of SSH clients")
@@ -55,17 +55,17 @@ and proxies them to local SSH daemon processes.
5555
ctx := cmd.Context()
5656
wsc := cmdctx.WorkspaceClient(ctx)
5757
opts := server.ServerOptions{
58-
ClusterID: clusterID,
59-
MaxClients: maxClients,
60-
ShutdownDelay: shutdownDelay,
61-
Version: version,
62-
ConfigDir: serverConfigDir,
63-
KeysSecretScopeName: keysSecretScopeName,
64-
AuthorizedKeyName: authorizedKeyName,
65-
ServerPrivateKeyName: serverPrivateKeyName,
66-
ServerPublicKeyName: serverPublicKeyName,
67-
DefaultPort: defaultServerPort,
68-
PortRange: serverPortRange,
58+
ClusterID: clusterID,
59+
MaxClients: maxClients,
60+
ShutdownDelay: shutdownDelay,
61+
Version: version,
62+
ConfigDir: serverConfigDir,
63+
SecretScopeName: secretScopeName,
64+
ServerPrivateKeyName: serverPrivateKeyName,
65+
ServerPublicKeyName: serverPublicKeyName,
66+
AuthorizedKeySecretName: authorizedKeySecretName,
67+
DefaultPort: defaultServerPort,
68+
PortRange: serverPortRange,
6969
}
7070
return server.Run(ctx, wsc, opts)
7171
}

experimental/ssh/internal/client/client.go

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ type ClientOptions struct {
5858
ReleasesDir string
5959
// Directory for local SSH keys. Defaults to ~/.databricks/ssh-tunnel-keys
6060
SSHKeysDir string
61-
// Name of the client public key file to be used in the ssh-tunnel secrets scope.
61+
// Client public key name located in the ssh-tunnel secrets scope.
6262
ClientPublicKeyName string
63+
// Client private key name located in the ssh-tunnel secrets scope.
64+
ClientPrivateKeyName string
6365
// If true, the CLI will attempt to start the cluster if it is not running.
6466
AutoStartCluster bool
6567
// Optional auth profile name. If present, will be added as --profile flag to the ProxyCommand while spawning ssh client.
@@ -85,21 +87,27 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
8587
return err
8688
}
8789

88-
keyPath, err := keys.GetLocalSSHKeyPath(opts.ClusterID, opts.SSHKeysDir)
90+
secretScopeName, err := keys.CreateKeysSecretScope(ctx, client, opts.ClusterID)
8991
if err != nil {
90-
return fmt.Errorf("failed to get local keys folder: %w", err)
92+
return fmt.Errorf("failed to create secret scope: %w", err)
93+
}
94+
95+
privateKeyBytes, publicKeyBytes, err := keys.CheckAndGenerateSSHKeyPairFromSecrets(ctx, client, opts.ClusterID, secretScopeName, opts.ClientPrivateKeyName, opts.ClientPublicKeyName)
96+
if err != nil {
97+
return fmt.Errorf("failed to get or generate SSH key pair from secrets: %w", err)
9198
}
92-
privateKeyPath, publicKey, err := keys.CheckAndGenerateSSHKeyPair(ctx, keyPath)
99+
100+
keyPath, err := keys.GetLocalSSHKeyPath(opts.ClusterID, opts.SSHKeysDir)
93101
if err != nil {
94-
return fmt.Errorf("failed to check or generate SSH key pair: %w", err)
102+
return fmt.Errorf("failed to get local keys folder: %w", err)
95103
}
96-
cmdio.LogString(ctx, "Using SSH key: "+privateKeyPath)
97104

98-
keysSecretScopeName, err := keys.PutSecretInScope(ctx, client, opts.ClusterID, opts.ClientPublicKeyName, publicKey)
105+
err = keys.SaveSSHKeyPair(keyPath, privateKeyBytes, publicKeyBytes)
99106
if err != nil {
100-
return fmt.Errorf("failed to store public key in secret scope: %w", err)
107+
return fmt.Errorf("failed to save SSH key pair locally: %w", err)
101108
}
102-
cmdio.LogString(ctx, fmt.Sprintf("Secrets scope: %s, key name: %s", keysSecretScopeName, opts.ClientPublicKeyName))
109+
cmdio.LogString(ctx, "Using SSH key: "+keyPath)
110+
cmdio.LogString(ctx, fmt.Sprintf("Secrets scope: %s, key name: %s", secretScopeName, opts.ClientPublicKeyName))
103111

104112
var userName string
105113
var serverPort int
@@ -111,7 +119,7 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
111119
if err := UploadTunnelReleases(ctx, client, version, opts.ReleasesDir); err != nil {
112120
return fmt.Errorf("failed to upload ssh-tunnel binaries: %w", err)
113121
}
114-
userName, serverPort, err = ensureSSHServerIsRunning(ctx, client, version, keysSecretScopeName, opts)
122+
userName, serverPort, err = ensureSSHServerIsRunning(ctx, client, version, secretScopeName, opts)
115123
if err != nil {
116124
return fmt.Errorf("failed to ensure that ssh server is running: %w", err)
117125
}
@@ -137,7 +145,7 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
137145
return runSSHProxy(ctx, client, serverPort, opts)
138146
} else {
139147
cmdio.LogString(ctx, fmt.Sprintf("Additional SSH arguments: %v", opts.AdditionalArgs))
140-
return spawnSSHClient(ctx, userName, privateKeyPath, serverPort, opts)
148+
return spawnSSHClient(ctx, userName, keyPath, serverPort, opts)
141149
}
142150
}
143151

@@ -175,7 +183,7 @@ func getServerMetadata(ctx context.Context, client *databricks.WorkspaceClient,
175183
return serverPort, string(bodyBytes), nil
176184
}
177185

178-
func submitSSHTunnelJob(ctx context.Context, client *databricks.WorkspaceClient, version, keysSecretScopeName string, opts ClientOptions) (int64, error) {
186+
func submitSSHTunnelJob(ctx context.Context, client *databricks.WorkspaceClient, version, secretScopeName string, opts ClientOptions) (int64, error) {
179187
contentDir, err := sshWorkspace.GetWorkspaceContentDir(ctx, client, version, opts.ClusterID)
180188
if err != nil {
181189
return 0, fmt.Errorf("failed to get workspace content directory: %w", err)
@@ -212,7 +220,7 @@ func submitSSHTunnelJob(ctx context.Context, client *databricks.WorkspaceClient,
212220
NotebookPath: jobNotebookPath,
213221
BaseParameters: map[string]string{
214222
"version": version,
215-
"keysSecretScopeName": keysSecretScopeName,
223+
"secretScopeName": secretScopeName,
216224
"authorizedKeySecretName": opts.ClientPublicKeyName,
217225
"shutdownDelay": opts.ShutdownDelay.String(),
218226
"maxClients": strconv.Itoa(opts.MaxClients),
@@ -290,12 +298,12 @@ func checkClusterState(ctx context.Context, client *databricks.WorkspaceClient,
290298
return nil
291299
}
292300

293-
func ensureSSHServerIsRunning(ctx context.Context, client *databricks.WorkspaceClient, version, keysSecretScopeName string, opts ClientOptions) (string, int, error) {
301+
func ensureSSHServerIsRunning(ctx context.Context, client *databricks.WorkspaceClient, version, secretScopeName string, opts ClientOptions) (string, int, error) {
294302
serverPort, userName, err := getServerMetadata(ctx, client, opts.ClusterID, version)
295303
if errors.Is(err, errServerMetadata) {
296304
cmdio.LogString(ctx, "SSH server is not running, starting it now...")
297305

298-
runID, err := submitSSHTunnelJob(ctx, client, version, keysSecretScopeName, opts)
306+
runID, err := submitSSHTunnelJob(ctx, client, version, secretScopeName, opts)
299307
if err != nil {
300308
return "", 0, fmt.Errorf("failed to submit ssh server job: %w", err)
301309
}

experimental/ssh/internal/client/ssh-server-bootstrap.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
SSH_TUNNEL_BASENAME = "databricks_cli"
1414

1515
dbutils.widgets.text("version", "")
16-
dbutils.widgets.text("keysSecretScopeName", "")
16+
dbutils.widgets.text("secretScopeName", "")
1717
dbutils.widgets.text("authorizedKeySecretName", "")
1818
dbutils.widgets.text("maxClients", "10")
1919
dbutils.widgets.text("shutdownDelay", "10m")
@@ -95,7 +95,7 @@ def run_ssh_server():
9595
if os.environ.get("VIRTUAL_ENV") is None:
9696
os.environ["VIRTUAL_ENV"] = sys.executable
9797

98-
secrets_scope = dbutils.widgets.get("keysSecretScopeName")
98+
secrets_scope = dbutils.widgets.get("secretScopeName")
9999
if not secrets_scope:
100100
raise RuntimeError("Secrets scope is required. Please provide it using the 'keysSecretScopeName' widget.")
101101

@@ -134,7 +134,7 @@ def run_ssh_server():
134134
"ssh",
135135
"server",
136136
f"--cluster={ctx.clusterId}",
137-
f"--keys-secret-scope-name={secrets_scope}",
137+
f"--secret-scope-name={secrets_scope}",
138138
f"--authorized-key-secret-name={public_key_secret_name}",
139139
f"--max-clients={max_clients}",
140140
f"--shutdown-delay={shutdown_delay}",

experimental/ssh/internal/keys/keys.go

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ import (
99
"fmt"
1010
"os"
1111
"path/filepath"
12-
"runtime"
13-
"strings"
1412

1513
"github.com/databricks/databricks-sdk-go"
1614
"golang.org/x/crypto/ssh"
@@ -70,61 +68,6 @@ func SaveSSHKeyPair(keyPath string, privateKeyBytes, publicKeyBytes []byte) erro
7068
return nil
7169
}
7270

73-
func checkSSHKeyPairPermissions(keyPath string) error {
74-
dir := filepath.Dir(keyPath)
75-
dirInfo, err := os.Stat(dir)
76-
if err != nil {
77-
if os.IsNotExist(err) {
78-
return fmt.Errorf("key directory does not exist: %s", dir)
79-
} else {
80-
return fmt.Errorf("failed to stat key directory: %w", err)
81-
}
82-
}
83-
if runtime.GOOS != "windows" && dirInfo.Mode().Perm() != 0o700 {
84-
return fmt.Errorf("key directory permissions are not set to 0700, current permissions: %o", dirInfo.Mode().Perm())
85-
}
86-
87-
info, err := os.Stat(keyPath)
88-
if err != nil {
89-
return fmt.Errorf("failed to stat key file: %w", err)
90-
}
91-
92-
if runtime.GOOS != "windows" && info.Mode().Perm() != 0o600 {
93-
return fmt.Errorf("private key permissions are not set to 0600, current permissions: %o", info.Mode().Perm())
94-
}
95-
96-
pubKeyPath := keyPath + ".pub"
97-
pubInfo, err := os.Stat(pubKeyPath)
98-
if err != nil {
99-
return fmt.Errorf("failed to stat public key file: %w", err)
100-
}
101-
102-
if runtime.GOOS != "windows" && pubInfo.Mode().Perm() != 0o644 {
103-
return fmt.Errorf("public key permissions are not set to 0644, current permissions: %o", pubInfo.Mode().Perm())
104-
}
105-
106-
return nil
107-
}
108-
109-
func CheckAndGenerateSSHKeyPair(ctx context.Context, keyPath string) (string, string, error) {
110-
if err := checkSSHKeyPairPermissions(keyPath); err != nil {
111-
privateKeyBytes, publicKeyBytes, err := generateSSHKeyPair()
112-
if err != nil {
113-
return "", "", err
114-
}
115-
if err := SaveSSHKeyPair(keyPath, privateKeyBytes, publicKeyBytes); err != nil {
116-
return "", "", err
117-
}
118-
}
119-
120-
publicKeyBytes, err := os.ReadFile(keyPath + ".pub")
121-
if err != nil {
122-
return "", "", fmt.Errorf("failed to read public key: %w", err)
123-
}
124-
125-
return keyPath, strings.TrimSpace(string(publicKeyBytes)), nil
126-
}
127-
12871
func CheckAndGenerateSSHKeyPairFromSecrets(ctx context.Context, client *databricks.WorkspaceClient, clusterID, secretScopeName, privateKeyName, publicKeyName string) ([]byte, []byte, error) {
12972
privateKeyBytes, err := GetSecret(ctx, client, secretScopeName, privateKeyName)
13073
if err != nil {

experimental/ssh/internal/keys/secrets.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"github.com/databricks/databricks-sdk-go/service/workspace"
1111
)
1212

13-
func createKeysSecretScope(ctx context.Context, client *databricks.WorkspaceClient, clusterID string) (string, error) {
13+
func CreateKeysSecretScope(ctx context.Context, client *databricks.WorkspaceClient, clusterID string) (string, error) {
1414
me, err := client.CurrentUser.Me(ctx)
1515
if err != nil {
1616
return "", fmt.Errorf("failed to get current user: %w", err)
@@ -54,7 +54,7 @@ func putSecret(ctx context.Context, client *databricks.WorkspaceClient, scope, k
5454
}
5555

5656
func PutSecretInScope(ctx context.Context, client *databricks.WorkspaceClient, clusterID, key, value string) (string, error) {
57-
scopeName, err := createKeysSecretScope(ctx, client, clusterID)
57+
scopeName, err := CreateKeysSecretScope(ctx, client, clusterID)
5858
if err != nil {
5959
return "", err
6060
}

experimental/ssh/internal/server/server.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ type ServerOptions struct {
3434
// The directory to store sshd configuration
3535
ConfigDir string
3636
// The name of the secrets scope to use for client and server keys
37-
KeysSecretScopeName string
38-
// The name of a secret containing the client's public key value
39-
AuthorizedKeyName string
37+
SecretScopeName string
4038
// The name of a secret containing the server's private key value
4139
ServerPrivateKeyName string
4240
// The name of a secret containing the server's public key value
4341
ServerPublicKeyName string
42+
// The name of a secret containing the client's public key (authorized key)
43+
AuthorizedKeySecretName string
4444
// The default port to listen on (for /ssh and /metadata requests from the clients)
4545
DefaultPort int
4646
// If the default port is taken, the server will try to listen on the first free port in the DefaultPort + PortRange range
@@ -71,10 +71,10 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ServerOpt
7171
return fmt.Errorf("failed to save Jupyter init script: %w", err)
7272
}
7373

74-
connections := proxy.NewConnectionsManager(opts.MaxClients, opts.ShutdownDelay)
7574
createServerCommand := func(ctx context.Context) *exec.Cmd {
7675
return createSSHDProcess(ctx, sshdConfigPath)
7776
}
77+
connections := proxy.NewConnectionsManager(opts.MaxClients, opts.ShutdownDelay)
7878
http.Handle("/ssh", proxy.NewProxyServer(ctx, connections, createServerCommand))
7979
http.HandleFunc("/metadata", serveMetadata)
8080
go handleTimeout(ctx, connections.TimedOut, opts.ShutdownDelay)

experimental/ssh/internal/server/sshd.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
)
1616

1717
func prepareSSHDConfig(ctx context.Context, client *databricks.WorkspaceClient, opts ServerOptions) (string, error) {
18-
clientPublicKey, err := keys.GetSecret(ctx, client, opts.KeysSecretScopeName, opts.AuthorizedKeyName)
18+
clientPublicKey, err := keys.GetSecret(ctx, client, opts.SecretScopeName, opts.AuthorizedKeySecretName)
1919
if err != nil {
2020
return "", fmt.Errorf("failed to get client public key: %w", err)
2121
}
@@ -36,7 +36,7 @@ func prepareSSHDConfig(ctx context.Context, client *databricks.WorkspaceClient,
3636
return "", fmt.Errorf("failed to create SSH directory: %w", err)
3737
}
3838

39-
privateKeyBytes, publicKeyBytes, err := keys.CheckAndGenerateSSHKeyPairFromSecrets(ctx, client, opts.ClusterID, opts.KeysSecretScopeName, opts.ServerPrivateKeyName, opts.ServerPublicKeyName)
39+
privateKeyBytes, publicKeyBytes, err := keys.CheckAndGenerateSSHKeyPairFromSecrets(ctx, client, opts.ClusterID, opts.SecretScopeName, opts.ServerPrivateKeyName, opts.ServerPublicKeyName)
4040
if err != nil {
4141
return "", fmt.Errorf("failed to get SSH key pair from secrets: %w", err)
4242
}
@@ -47,8 +47,8 @@ func prepareSSHDConfig(ctx context.Context, client *databricks.WorkspaceClient,
4747
}
4848

4949
sshdConfig := filepath.Join(sshDir, "sshd_config")
50-
authKeys := filepath.Join(sshDir, "authorized_keys")
51-
if err := os.WriteFile(authKeys, clientPublicKey, 0o600); err != nil {
50+
authKeysPath := filepath.Join(sshDir, "authorized_keys")
51+
if err := os.WriteFile(authKeysPath, clientPublicKey, 0o600); err != nil {
5252
return "", err
5353
}
5454

@@ -75,7 +75,7 @@ func prepareSSHDConfig(ctx context.Context, client *databricks.WorkspaceClient,
7575
"ChallengeResponseAuthentication no\n" +
7676
"Subsystem sftp internal-sftp\n" +
7777
"HostKey " + keyPath + "\n" +
78-
"AuthorizedKeysFile " + authKeys + "\n" +
78+
"AuthorizedKeysFile " + authKeysPath + "\n" +
7979
setEnv + "\n"
8080

8181
if err := os.WriteFile(sshdConfig, []byte(sshdConfigContent), 0o600); err != nil {

0 commit comments

Comments
 (0)