Skip to content

Commit f2929df

Browse files
authored
Fix #296 - Added support for local disk encryption (#313)
Co-authored-by: Serge Smertin <[email protected]>
1 parent aafa7d5 commit f2929df

File tree

2 files changed

+44
-41
lines changed

2 files changed

+44
-41
lines changed

compute/model.go

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,11 @@ type Cluster struct {
222222
ClusterID string `json:"cluster_id,omitempty"`
223223
ClusterName string `json:"cluster_name,omitempty"`
224224

225-
SparkVersion string `json:"spark_version"` // TODO: perhaps make a default
226-
NumWorkers int32 `json:"num_workers,omitempty" tf:"group:size"`
227-
Autoscale *AutoScale `json:"autoscale,omitempty" tf:"group:size"`
228-
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty" tf:"computed"`
225+
SparkVersion string `json:"spark_version"` // TODO: perhaps make a default
226+
NumWorkers int32 `json:"num_workers,omitempty" tf:"group:size"`
227+
Autoscale *AutoScale `json:"autoscale,omitempty" tf:"group:size"`
228+
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty" tf:"computed"`
229+
EnableLocalDiskEncryption bool `json:"enable_local_disk_encryption,omitempty"`
229230

230231
NodeTypeID string `json:"node_type_id,omitempty" tf:"group:node_type,computed"`
231232
DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"conflicts:instance_pool_id,computed"`
@@ -254,43 +255,44 @@ type ClusterList struct {
254255

255256
// ClusterInfo contains the information when getting cluster info from the get request.
256257
type ClusterInfo struct {
257-
NumWorkers int32 `json:"num_workers,omitempty"`
258-
AutoScale *AutoScale `json:"autoscale,omitempty"`
259-
ClusterID string `json:"cluster_id,omitempty"`
260-
CreatorUserName string `json:"creator_user_name,omitempty"`
261-
Driver *SparkNode `json:"driver,omitempty"`
262-
Executors []SparkNode `json:"executors,omitempty"`
263-
SparkContextID int64 `json:"spark_context_id,omitempty"`
264-
JdbcPort int32 `json:"jdbc_port,omitempty"`
265-
ClusterName string `json:"cluster_name,omitempty"`
266-
SparkVersion string `json:"spark_version"`
267-
SparkConf map[string]string `json:"spark_conf,omitempty"`
268-
AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty"`
269-
NodeTypeID string `json:"node_type_id,omitempty"`
270-
DriverNodeTypeID string `json:"driver_node_type_id,omitempty"`
271-
SSHPublicKeys []string `json:"ssh_public_keys,omitempty"`
272-
CustomTags map[string]string `json:"custom_tags,omitempty"`
273-
ClusterLogConf *StorageInfo `json:"cluster_log_conf,omitempty"`
274-
InitScripts []StorageInfo `json:"init_scripts,omitempty"`
275-
SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"`
276-
AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"`
277-
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
278-
InstancePoolID string `json:"instance_pool_id,omitempty"`
279-
PolicyID string `json:"policy_id,omitempty"`
280-
SingleUserName string `json:"single_user_name,omitempty"`
281-
ClusterSource AwsAvailability `json:"cluster_source,omitempty"`
282-
DockerImage *DockerImage `json:"docker_image,omitempty"`
283-
State ClusterState `json:"state"`
284-
StateMessage string `json:"state_message,omitempty"`
285-
StartTime int64 `json:"start_time,omitempty"`
286-
TerminateTime int64 `json:"terminate_time,omitempty"`
287-
LastStateLossTime int64 `json:"last_state_loss_time,omitempty"`
288-
LastActivityTime int64 `json:"last_activity_time,omitempty"`
289-
ClusterMemoryMb int64 `json:"cluster_memory_mb,omitempty"`
290-
ClusterCores float32 `json:"cluster_cores,omitempty"`
291-
DefaultTags map[string]string `json:"default_tags"`
292-
ClusterLogStatus *LogSyncStatus `json:"cluster_log_status,omitempty"`
293-
TerminationReason *TerminationReason `json:"termination_reason,omitempty"`
258+
NumWorkers int32 `json:"num_workers,omitempty"`
259+
AutoScale *AutoScale `json:"autoscale,omitempty"`
260+
ClusterID string `json:"cluster_id,omitempty"`
261+
CreatorUserName string `json:"creator_user_name,omitempty"`
262+
Driver *SparkNode `json:"driver,omitempty"`
263+
Executors []SparkNode `json:"executors,omitempty"`
264+
SparkContextID int64 `json:"spark_context_id,omitempty"`
265+
JdbcPort int32 `json:"jdbc_port,omitempty"`
266+
ClusterName string `json:"cluster_name,omitempty"`
267+
SparkVersion string `json:"spark_version"`
268+
SparkConf map[string]string `json:"spark_conf,omitempty"`
269+
AwsAttributes *AwsAttributes `json:"aws_attributes,omitempty"`
270+
NodeTypeID string `json:"node_type_id,omitempty"`
271+
DriverNodeTypeID string `json:"driver_node_type_id,omitempty"`
272+
SSHPublicKeys []string `json:"ssh_public_keys,omitempty"`
273+
CustomTags map[string]string `json:"custom_tags,omitempty"`
274+
ClusterLogConf *StorageInfo `json:"cluster_log_conf,omitempty"`
275+
InitScripts []StorageInfo `json:"init_scripts,omitempty"`
276+
SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"`
277+
AutoterminationMinutes int32 `json:"autotermination_minutes,omitempty"`
278+
EnableElasticDisk bool `json:"enable_elastic_disk,omitempty"`
279+
EnableLocalDiskEncryption bool `json:"enable_local_disk_encryption,omitempty"`
280+
InstancePoolID string `json:"instance_pool_id,omitempty"`
281+
PolicyID string `json:"policy_id,omitempty"`
282+
SingleUserName string `json:"single_user_name,omitempty"`
283+
ClusterSource AwsAvailability `json:"cluster_source,omitempty"`
284+
DockerImage *DockerImage `json:"docker_image,omitempty"`
285+
State ClusterState `json:"state"`
286+
StateMessage string `json:"state_message,omitempty"`
287+
StartTime int64 `json:"start_time,omitempty"`
288+
TerminateTime int64 `json:"terminate_time,omitempty"`
289+
LastStateLossTime int64 `json:"last_state_loss_time,omitempty"`
290+
LastActivityTime int64 `json:"last_activity_time,omitempty"`
291+
ClusterMemoryMb int64 `json:"cluster_memory_mb,omitempty"`
292+
ClusterCores float32 `json:"cluster_cores,omitempty"`
293+
DefaultTags map[string]string `json:"default_tags"`
294+
ClusterLogStatus *LogSyncStatus `json:"cluster_log_status,omitempty"`
295+
TerminationReason *TerminationReason `json:"termination_reason,omitempty"`
294296
}
295297

296298
// IsRunningOrResizing returns true if cluster is running or resizing

docs/resources/cluster.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ resource "databricks_cluster" "shared_autoscaling" {
2626
* `autotermination_minutes` - (Optional) Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly
2727
disable automatic termination. _It is highly recommended to have this setting present for Interfactive/BI clusters._
2828
* `enable_elastic_disk` - (Optional) If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have `autotermination_minutes` and `autoscale` attributes set. More documentation available at [cluster configuration page](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage-1).
29+
* `enable_local_disk_encryption` - (Optional) Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or ephemeral data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data that is stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally that is unique to each cluster node and is used to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. _Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access._
2930
* `single_user_name` - (Optional) The optional user name of the user to assign to an interactive cluster. This is required when using standard AAD Passthrough for Azure Datalake Storage (ADLS) with a single-user cluster (i.e. not high-concurrency clusters).
3031
* `idempotency_token` - (Optional) An optional token that can be used to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the ID of the existing cluster instead. The existence of a cluster with the same token is not checked against terminated clusters. If you specify the idempotency token, upon failure you can retry until the request succeeds. Databricks will guarantee that exactly one cluster will be launched with that idempotency token. This token should have at most 64 characters.
3132
* `ssh_public_keys` - (Optional) SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. Up to 10 keys can be specified.

0 commit comments

Comments
 (0)