Skip to content

Commit 60d596b

Browse files
authored
Add delegation support for databricks_storage_credential (#2973)
* refactor storage credential to support delegation * update guide * doc
1 parent ee3bafc commit 60d596b

File tree

7 files changed

+82
-68
lines changed

7 files changed

+82
-68
lines changed

catalog/resource_metastore_data_access.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ import (
1212
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
1313
)
1414

15-
type AwsIamRole struct {
16-
RoleARN string `json:"role_arn"`
17-
}
18-
1915
type GcpServiceAccountKey struct {
2016
Email string `json:"email"`
2117
PrivateKeyId string `json:"private_key_id"`
@@ -41,6 +37,8 @@ func adjustDataAccessSchema(m map[string]*schema.Schema) map[string]*schema.Sche
4137
// suppress changes for private_key
4238
m["gcp_service_account_key"].DiffSuppressFunc = SuppressGcpSAKeyDiff
4339

40+
common.MustSchemaPath(m, "aws_iam_role", "external_id").Computed = true
41+
common.MustSchemaPath(m, "aws_iam_role", "unity_catalog_iam_arn").Computed = true
4442
common.MustSchemaPath(m, "azure_managed_identity", "credential_id").Computed = true
4543
common.MustSchemaPath(m, "databricks_gcp_service_account", "email").Computed = true
4644
common.MustSchemaPath(m, "databricks_gcp_service_account", "credential_id").Computed = true
@@ -55,10 +53,6 @@ func adjustDataAccessSchema(m map[string]*schema.Schema) map[string]*schema.Sche
5553

5654
var dacSchema = common.StructToSchema(StorageCredentialInfo{},
5755
func(m map[string]*schema.Schema) map[string]*schema.Schema {
58-
m["metastore_id"] = &schema.Schema{
59-
Type: schema.TypeString,
60-
Required: true,
61-
}
6256
m["is_default"] = &schema.Schema{
6357
// having more than one default DAC per metastore will lead
6458
// to Terraform re-assigning default_data_access_config_id

catalog/resource_storage_credential.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@ import (
1010
)
1111

1212
type StorageCredentialInfo struct {
13-
Name string `json:"name" tf:"force_new"`
14-
Owner string `json:"owner,omitempty" tf:"computed"`
15-
Comment string `json:"comment,omitempty"`
16-
Aws AwsIamRole `json:"aws_iam_role,omitempty" tf:"group:access"`
17-
Azure *catalog.AzureServicePrincipal `json:"azure_service_principal,omitempty" tf:"group:access"`
18-
AzMI *catalog.AzureManagedIdentity `json:"azure_managed_identity,omitempty" tf:"group:access"`
19-
GcpSAKey *GcpServiceAccountKey `json:"gcp_service_account_key,omitempty" tf:"group:access"`
20-
DBGcpSA *catalog.DatabricksGcpServiceAccountResponse `json:"databricks_gcp_service_account,omitempty" tf:"computed"`
21-
MetastoreID string `json:"metastore_id,omitempty" tf:"computed"`
22-
ReadOnly bool `json:"read_only,omitempty"`
13+
Name string `json:"name" tf:"force_new"`
14+
Owner string `json:"owner,omitempty" tf:"computed"`
15+
Comment string `json:"comment,omitempty"`
16+
Aws *catalog.AwsIamRole `json:"aws_iam_role,omitempty" tf:"group:access"`
17+
Azure *catalog.AzureServicePrincipal `json:"azure_service_principal,omitempty" tf:"group:access"`
18+
AzMI *catalog.AzureManagedIdentity `json:"azure_managed_identity,omitempty" tf:"group:access"`
19+
GcpSAKey *GcpServiceAccountKey `json:"gcp_service_account_key,omitempty" tf:"group:access"`
20+
DatabricksGcpServiceAccount *catalog.DatabricksGcpServiceAccountResponse `json:"databricks_gcp_service_account,omitempty" tf:"computed"`
21+
MetastoreID string `json:"metastore_id,omitempty" tf:"computed"`
22+
ReadOnly bool `json:"read_only,omitempty"`
2323
}
2424

2525
func removeGcpSaField(originalSchema map[string]*schema.Schema) map[string]*schema.Schema {

catalog/resource_storage_credential_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ func TestCreateStorageCredentials(t *testing.T) {
3434
Response: catalog.StorageCredentialInfo{
3535
Name: "a",
3636
AwsIamRole: &catalog.AwsIamRole{
37-
RoleArn: "def",
37+
RoleArn: "def",
38+
ExternalId: "123",
3839
},
3940
MetastoreId: "d",
4041
},
@@ -49,7 +50,11 @@ func TestCreateStorageCredentials(t *testing.T) {
4950
}
5051
comment = "c"
5152
`,
52-
}.ApplyNoError(t)
53+
}.ApplyAndExpectData(t, map[string]any{
54+
"aws_iam_role.0.external_id": "123",
55+
"aws_iam_role.0.role_arn": "def",
56+
"name": "a",
57+
})
5358
}
5459

5560
func TestCreateStorageCredentialWithOwner(t *testing.T) {

docs/guides/unity-catalog.md

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ variable "databricks_client_id" {}
7979
variable "databricks_client_secret" {}
8080
variable "databricks_account_id" {}
8181
variable "databricks_workspace_url" {}
82-
variable "aws_account_id" {}
8382
8483
variable "tags" {
8584
default = {}
@@ -196,7 +195,31 @@ Unity Catalog introduces two new objects to access and work with external cloud
196195
- [databricks_storage_credential](../resources/storage_credential.md) represent authentication methods to access cloud storage (e.g. an IAM role for Amazon S3 or a service principal for Azure Storage). Storage credentials are access-controlled to determine which users can use the credential.
197196
- [databricks_external_location](../resources/external_location.md) are objects that combine a cloud storage path with a Storage Credential that can be used to access the location.
198197

199-
First, create the required objects in AWS.
198+
First, we need to create the storage credential in Databricks before creating the IAM role in AWS. This is because the external ID of the Databricks storage credential is required in the IAM role trust policy.
199+
200+
```hcl
201+
data "aws_caller_identity" "current" {}
202+
203+
resource "databricks_storage_credential" "external" {
204+
provider = databricks.workspace
205+
name = "${local.prefix}-external-access"
206+
aws_iam_role {
207+
role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${local.prefix}-uc-access" //cannot reference aws_iam_role directly, as it will create circular dependency
208+
}
209+
comment = "Managed by TF"
210+
}
211+
212+
resource "databricks_grants" "external_creds" {
213+
provider = databricks.workspace
214+
storage_credential = databricks_storage_credential.external.id
215+
grant {
216+
principal = "Data Engineers"
217+
privileges = ["CREATE_TABLE"]
218+
}
219+
}
220+
```
221+
222+
Then we can create the required objects in AWS
200223

201224
```hcl
202225
resource "aws_s3_bucket" "external" {
@@ -222,6 +245,36 @@ resource "aws_s3_bucket_public_access_block" "external" {
222245
depends_on = [aws_s3_bucket.external]
223246
}
224247
248+
data "aws_iam_policy_document" "passrole_for_uc" {
249+
statement {
250+
effect = "Allow"
251+
actions = ["sts:AssumeRole"]
252+
principals {
253+
identifiers = [databricks_storage_credential.external.aws_iam_role.unity_catalog_iam_arn]
254+
type = "AWS"
255+
}
256+
condition {
257+
test = "StringEquals"
258+
variable = "sts:ExternalId"
259+
values = [databricks_storage_credential.external.aws_iam_role.external_id]
260+
}
261+
}
262+
statement {
263+
sid = "ExplicitSelfRoleAssumption"
264+
effect = "Allow"
265+
actions = ["sts:AssumeRole"]
266+
principals {
267+
type = "AWS"
268+
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
269+
}
270+
condition {
271+
test = "ArnLike"
272+
variable = "aws:PrincipalArn"
273+
values = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${local.prefix}-uc-access"]
274+
}
275+
}
276+
}
277+
225278
resource "aws_iam_policy" "external_data_access" {
226279
// Terraform's "jsonencode" function converts a
227280
// Terraform expression's result to valid JSON syntax.
@@ -262,26 +315,9 @@ resource "aws_iam_role" "external_data_access" {
262315
}
263316
```
264317

265-
Then create the [databricks_storage_credential](../resources/storage_credential.md) and [databricks_external_location](../resources/external_location.md) in Unity Catalog.
318+
Then we can create the [databricks_external_location](../resources/external_location.md) in Unity Catalog.
266319

267320
```hcl
268-
resource "databricks_storage_credential" "external" {
269-
provider = databricks.workspace
270-
name = aws_iam_role.external_data_access.name
271-
aws_iam_role {
272-
role_arn = aws_iam_role.external_data_access.arn
273-
}
274-
comment = "Managed by TF"
275-
}
276-
277-
resource "databricks_grants" "external_creds" {
278-
provider = databricks.workspace
279-
storage_credential = databricks_storage_credential.external.id
280-
grant {
281-
principal = "Data Engineers"
282-
privileges = ["CREATE_TABLE"]
283-
}
284-
}
285321
286322
resource "databricks_external_location" "some" {
287323
provider = databricks.workspace

docs/resources/grants.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Unlike the [SQL specification](https://docs.databricks.com/sql/language-manual/s
2929

3030
## Metastore grants
3131

32-
You can grant `CREATE_CATALOG`, `CREATE_CONNECTION`, `CREATE_EXTERNAL_LOCATION`, `CREATE_PROVIDER`, `CREATE_RECIPIENT`, `CREATE_SHARE`, `MANAGE_ALLOWLIST`, `SET_SHARE_PERMISSION`, `USE_MARKETPLACE_ASSETS`, `USE_CONNECTION`, `USE_PROVIDER`, `USE_RECIPIENT` and `USE_SHARE` privileges to [databricks_metastore](metastore.md) id specified in `metastore` attribute.
32+
You can grant `CREATE_CATALOG`, `CREATE_CONNECTION`, `CREATE_EXTERNAL_LOCATION`, `CREATE_PROVIDER`, `CREATE_RECIPIENT`, `CREATE_SHARE`, `CREATE_STORAGE_CREDENTIAL`, `MANAGE_ALLOWLIST`, `SET_SHARE_PERMISSION`, `USE_MARKETPLACE_ASSETS`, `USE_CONNECTION`, `USE_PROVIDER`, `USE_RECIPIENT` and `USE_SHARE` privileges to [databricks_metastore](metastore.md) id specified in `metastore` attribute.
3333

3434
```hcl
3535
resource "databricks_grants" "sandbox" {

docs/resources/metastore_data_access.md

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ subcategory: "Unity Catalog"
33
---
44
# databricks_metastore_data_access (Resource)
55

6-
Optionally, each [databricks_metastore](docs/resources/metastore.md) can have root storage credential defined as `databricks_metastore_data_access`. This will be used by Unity Catalog to access data in the root storage location if defined.
6+
Optionally, each [databricks_metastore](docs/resources/metastore.md) can have a default [databricks_storage_credential](storage_credential.md) defined as `databricks_metastore_data_access`. This will be used by Unity Catalog to access data in the root storage location if defined.
77

88
## Example Usage
99

@@ -53,31 +53,9 @@ resource "databricks_metastore_data_access" "this" {
5353

5454
## Argument Reference
5555

56-
The following arguments are required:
56+
The arguments are the same as of [databricks_storage_credential](storage_credential.md). Additionally
5757

58-
* `name` - Name of Data Access Configuration, which must be unique within the [databricks_metastore](metastore.md). Change forces creation of a new resource.
59-
* `metastore_id` - Unique identifier of the parent Metastore
60-
* `owner` - (Optional) Username/groupname/sp application_id of the data access configuration owner.
61-
* `force_destroy` - (Optional) Delete the data access configuration regardless of its dependencies.
62-
63-
`aws_iam_role` optional configuration block for credential details for AWS:
64-
65-
* `role_arn` - The Amazon Resource Name (ARN) of the AWS IAM role for S3 data access, of the form `arn:aws:iam::1234567890:role/MyRole-AJJHDSKSDF`
66-
67-
`azure_managed_identity` optional configuration block for using managed identity as credential details for Azure (Recommended):
68-
69-
* `access_connector_id` - The Resource ID of the Azure Databricks Access Connector resource, of the form `/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg-name/providers/Microsoft.Databricks/accessConnectors/connector-name`.
70-
* `managed_identity_id` - (Optional) The Resource ID of the Azure User Assigned Managed Identity associated with Azure Databricks Access Connector, of the form `/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/rg-name/providers/Microsoft.ManagedIdentity/userAssignedIdentities/user-managed-identity-name`.
71-
72-
`databricks_gcp_service_account` optional configuration block for creating a Databricks-managed GCP Service Account:
73-
74-
* `email` (output only) - The email of the GCP service account created, to be granted access to relevant buckets.
75-
76-
`azure_service_principal` optional configuration block for credential details for Azure (Legacy):
77-
78-
* `directory_id` - The directory ID corresponding to the Azure Active Directory (AAD) tenant of the application
79-
* `application_id` - The application ID of the application registration within the referenced AAD tenant
80-
* `client_secret` - The client secret generated for the above app ID in AAD. **This field is redacted on output**
58+
* `is_default` - whether to set this credential as the default for the metastore. In practice, this should always be true.
8159

8260
## Attribute Reference
8361

docs/resources/storage_credential.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,14 @@ The following arguments are required:
7474
- `name` - Name of Storage Credentials, which must be unique within the [databricks_metastore](metastore.md). Change forces creation of a new resource.
7575
- `metastore_id` - (Required for account-level) Unique identifier of the parent Metastore
7676
- `owner` - (Optional) Username/groupname/sp application_id of the storage credential owner.
77+
- `read_only` - (Optional) Indicates whether the storage credential is only usable for read operations.
7778
- `force_destroy` - (Optional) Delete storage credential regardless of its dependencies.
7879

7980
`aws_iam_role` optional configuration block for credential details for AWS:
8081

8182
- `role_arn` - The Amazon Resource Name (ARN) of the AWS IAM role for S3 data access, of the form `arn:aws:iam::1234567890:role/MyRole-AJJHDSKSDF`
83+
- `external_id` (output only) - The external ID used in role assumption to prevent confused deputy problem.
84+
- `unity_catalog_iam_arn` (output only) - The Amazon Resource Name (ARN) of the AWS IAM user managed by Databricks. This is the identity that is going to assume the AWS IAM role.
8285

8386
`azure_managed_identity` optional configuration block for using managed identity as credential details for Azure (recommended over service principal):
8487

@@ -90,8 +93,6 @@ The following arguments are required:
9093

9194
- `email` (output only) - The email of the GCP service account created, to be granted access to relevant buckets.
9295

93-
- `read_only` - (Optional) Indicates whether the storage credential is only usable for read operations.
94-
9596
`azure_service_principal` optional configuration block to use service principal as credential details for Azure (Legacy):
9697

9798
- `directory_id` - The directory ID corresponding to the Azure Active Directory (AAD) tenant of the application

0 commit comments

Comments
 (0)