Skip to content

Commit 6f29524

Browse files
authored
Added databricks_mws_log_delivery resource for billing & audit logs (#343)
* Initial commit for log delivery resource * Applied review comments and added unit tests * Added integration tests for log delivery resource * Added changelog Co-authored-by: Serge Smertin <[email protected]>
1 parent 2b03da5 commit 6f29524

File tree

11 files changed

+698
-7
lines changed

11 files changed

+698
-7
lines changed

.github/ISSUE_TEMPLATE/provider-issue.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ To get relevant environment variable _names_ please copypaste the output of the
3232
### Debug Output
3333
Please add turn on logging, e.g. `TF_LOG=DEBUG terraform apply` and run command again, paste it to gist & provide the link to gist. If you're still willing to paste in log output, make sure you provide only relevant log lines with requests.
3434

35+
It would make it more readable, if you pipe the log through `| grep databricks | sed -E 's/^.* plugin[^:]+: (.*)$/\1/'`, e.g.:
36+
37+
```
38+
TF_LOG=DEBUG terraform plan 2>&1 | grep databricks | sed -E 's/^.* plugin[^:]+: (.*)$/\1/'
39+
```
40+
3541
### Panic Output
3642
If Terraform produced a panic, please provide a link to a GitHub Gist containing the output of the `crash.log`.
3743

CHANGELOG.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,19 @@
22

33
## 0.2.8
44

5-
* Added [Azure Key Vault support](https://github.com/databrickslabs/terraform-provider-databricks/pull/381) for databricks_secret_scope for Azure CLI authenticated users
6-
* Added support for pinning clusters (issue #113)
7-
* Internal: API for retrieval of the cluster events
5+
* Added [databricks_mws_log_delivery](https://github.com/databrickslabs/terraform-provider-databricks/pull/343) resource for billable usage & audit logs consumption.
6+
* Added [databricks_node_type](https://github.com/databrickslabs/terraform-provider-databricks/pull/376) data source for simpler selection of node types across AWS & Azure.
7+
* Added [Azure Key Vault support](https://github.com/databrickslabs/terraform-provider-databricks/pull/381) for databricks_secret_scope for Azure CLI authenticated users.
8+
* Added [is_pinned](https://github.com/databrickslabs/terraform-provider-databricks/pull/348) support for `databricks_cluster` resource.
9+
* Internal: API for retrieval of the cluster events.
10+
11+
Updated dependency versions:
12+
13+
* github.com/Azure/go-autorest/autorest v0.11.9
14+
* github.com/Azure/go-autorest/autorest/adal v0.9.5
15+
* github.com/Azure/go-autorest/autorest/azure/auth v0.5.3
16+
* github.com/Azure/go-autorest/autorest/azure/cli v0.4.2
17+
* gopkg.in/ini.v1 1.62.0
818

919
## 0.2.7
1020

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ End-to-end workspace creation on [AWS](scripts/awsmt-integration) or [Azure](scr
2525
| [databricks_job](docs/resources/job.md)
2626
| [databricks_mws_credentials](docs/resources/mws_credentials.md)
2727
| [databricks_mws_customer_managed_keys](docs/resources/mws_customer_managed_keys.md)
28+
| [databricks_mws_log_delivery](docs/resources/mws_log_delivery.md)
2829
| [databricks_mws_networks](docs/resources/mws_networks.md)
2930
| [databricks_mws_storage_configurations](docs/resources/mws_storage_configurations.md)
3031
| [databricks_mws_workspaces](docs/resources/mws_workspaces.md)
32+
| [databricks_node_type](docs/data-sources/node_type.md) data
3133
| [databricks_notebook](docs/resources/notebook.md)
3234
| [databricks_notebook](docs/data-sources/notebook.md) data
3335
| [databricks_notebook_paths](docs/data-sources/notebook_paths.md) data

access/data_aws_policies.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func DataAwsAssumeRolePolicy() *schema.Resource {
153153
return &schema.Resource{
154154
Read: func(d *schema.ResourceData, m interface{}) error {
155155
externalID := d.Get("external_id").(string)
156-
policyJSON, err := json.MarshalIndent(awsIamPolicy{
156+
policy := awsIamPolicy{
157157
Version: "2008-10-17",
158158
Statements: []*awsIamPolicyStatement{
159159
{
@@ -169,7 +169,15 @@ func DataAwsAssumeRolePolicy() *schema.Resource {
169169
},
170170
},
171171
},
172-
}, "", " ")
172+
}
173+
if v, ok := d.GetOk("for_log_delivery"); ok {
174+
if v.(bool) {
175+
// this is production UsageDelivery IAM role, that is considered a constant
176+
logDeliveryARN := "arn:aws:iam::414351767826:role/SaasUsageDeliveryRole-prod-IAMRole-3PLHICCRR1TK"
177+
policy.Statements[0].Principal["AWS"] = logDeliveryARN
178+
}
179+
}
180+
policyJSON, err := json.MarshalIndent(policy, "", " ")
173181
if err != nil {
174182
return err
175183
}
@@ -182,6 +190,12 @@ func DataAwsAssumeRolePolicy() *schema.Resource {
182190
Default: "414351767826",
183191
Optional: true,
184192
},
193+
"for_log_delivery": {
194+
Type: schema.TypeBool,
195+
Description: "Grant AssumeRole to Databricks SaasUsageDeliveryRole instead of root account",
196+
Optional: true,
197+
Default: false,
198+
},
185199
"external_id": {
186200
Type: schema.TypeString,
187201
Required: true,

docs/data-sources/aws_assume_role_policy.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ resource "databricks_mws_credentials" "this" {
4646
## Argument Reference
4747

4848
* `external_id` (Required) (String) External ID that can be found at http://accounts.cloud.databricks.com/#aws
49+
* `for_log_delivery` (Optional) Either or not this assume role policy should be created for usage log delivery. Defaults to false.
4950

5051
## Attribute Reference
5152

docs/resources/mws_log_delivery.md

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# databricks_mws_log_delivery Resource
2+
3+
-> **Note** This resource has an evolving API, which may change in future versions of the provider.
4+
5+
This resource configures the delivery of the two supported log types from Databricks workspaces: [billable usage logs](https://docs.databricks.com/administration-guide/account-settings/billable-usage-delivery.html) and [audit logs](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html). You cannot delete a log delivery configuration, but you can disable it when you no longer need it. This fact is important because there is a limit to the number of enabled log delivery configurations that you can create for an account. You can create a maximum of two enabled using the account level *(without workspace filter)* and two that use the workspace filter. There is an additional uniqueness constraint that two enabled configurations cannot share all their fields (not including the `config_name`). Re-enabling may fail when there's a violation of limit or uniqueness constraints.
6+
7+
## Example Usage
8+
9+
End-to-end example of usage and audit log delivery:
10+
11+
```hcl
12+
resource "aws_s3_bucket" "logdelivery" {
13+
bucket = "${var.prefix}-logdelivery"
14+
acl = "private"
15+
versioning {
16+
enabled = false
17+
}
18+
force_destroy = true
19+
tags = merge(var.tags, {
20+
Name = "${var.prefix}-logdelivery"
21+
})
22+
}
23+
24+
resource "aws_s3_bucket_public_access_block" "logdelivery" {
25+
bucket = aws_s3_bucket.logdelivery.id
26+
ignore_public_acls = true
27+
}
28+
29+
data "databricks_aws_assume_role_policy" "logdelivery" {
30+
external_id = var.account_id
31+
for_log_delivery = true
32+
}
33+
34+
resource "aws_iam_role" "logdelivery" {
35+
name = "${var.prefix}-logdelivery"
36+
description = "(${var.prefix}) UsageDelivery role"
37+
assume_role_policy = data.databricks_aws_assume_role_policy.logdelivery.json
38+
tags = var.tags
39+
}
40+
41+
data "databricks_aws_bucket_policy" "logdelivery" {
42+
full_access_role = aws_iam_role.logdelivery.arn
43+
bucket = aws_s3_bucket.logdelivery.bucket
44+
}
45+
46+
resource "aws_s3_bucket_policy" "logdelivery" {
47+
bucket = aws_s3_bucket.logdelivery.id
48+
policy = data.databricks_aws_bucket_policy.logdelivery.json
49+
}
50+
51+
resource "databricks_mws_credentials" "log_writer" {
52+
account_id = var.account_id
53+
credentials_name = "Usage Delivery"
54+
role_arn = aws_iam_role.logdelivery.arn
55+
}
56+
57+
resource "databricks_mws_storage_configurations" "log_bucket" {
58+
account_id = var.account_id
59+
storage_configuration_name = "Usage Logs"
60+
bucket_name = aws_s3_bucket.logdelivery.bucket
61+
}
62+
63+
resource "databricks_mws_log_delivery" "usage_logs" {
64+
account_id = var.account_id
65+
credentials_id = databricks_mws_credentials.log_writer.credentials_id
66+
storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id
67+
delivery_path_prefix = "billable-usage"
68+
config_name = "Usage Logs"
69+
log_type = "BILLABLE_USAGE"
70+
output_format = "CSV"
71+
}
72+
73+
resource "databricks_mws_log_delivery" "audit_logs" {
74+
account_id = var.account_id
75+
credentials_id = databricks_mws_credentials.log_writer.credentials_id
76+
storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id
77+
delivery_path_prefix = "audit-logs"
78+
config_name = "Audit Logs"
79+
log_type = "AUDIT_LOGS"
80+
output_format = "JSON"
81+
}
82+
```
83+
84+
## Billable Usage
85+
86+
CSV files with [static schema](https://docs.databricks.com/administration-guide/account-settings/usage.html) are delivered to `<delivery_path_prefix>/billable-usage/csv/`. Files are named `workspaceId=<workspace-id>-usageMonth=<month>.csv`, which are delivered daily by overwriting the month's CSV file for each workspace.
87+
88+
```hcl
89+
resource "databricks_mws_log_delivery" "usage_logs" {
90+
account_id = var.account_id
91+
credentials_id = databricks_mws_credentials.log_writer.credentials_id
92+
storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id
93+
delivery_path_prefix = "billable-usage"
94+
config_name = "Usage Logs"
95+
log_type = "BILLABLE_USAGE"
96+
output_format = "CSV"
97+
}
98+
```
99+
100+
## Audit Logs
101+
102+
JSON files with [static schema](https://docs.databricks.com/administration-guide/account-settings/audit-logs.html#audit-log-schema) are delivered to `<delivery_path_prefix>/workspaceId=<workspaceId>/date=<yyyy-mm-dd>/auditlogs_<internal-id>.json`. Logs are available within 15 minutes of activation for audit logs. New JSON files are delivered every few minutes, potentially overwriting existing files for each workspace. Sometimes data may arrive later than 15 minutes. Databricks can overwrite the delivered log files in your bucket at any time. If a file is overwritten, the existing content remains, but there may be additional lines for more auditable events. Overwriting ensures exactly-once semantics without requiring read or delete access to your account.
103+
104+
```hcl
105+
resource "databricks_mws_log_delivery" "audit_logs" {
106+
account_id = var.account_id
107+
credentials_id = databricks_mws_credentials.log_writer.credentials_id
108+
storage_configuration_id = databricks_mws_storage_configurations.log_bucket.storage_configuration_id
109+
delivery_path_prefix = "audit-logs"
110+
config_name = "Audit Logs"
111+
log_type = "AUDIT_LOGS"
112+
output_format = "JSON"
113+
}
114+
```
115+
116+
## Argument reference
117+
118+
* `account_id` - The Databricks account ID that hosts the log delivery configuration.
119+
* `config_name` - The optional human-readable name of the log delivery configuration. Defaults to empty.
120+
* `log_type` - The type of log delivery. `BILLABLE_USAGE` and `AUDIT_LOGS` are supported.
121+
* `output_format` - The file type of log delivery. Currently `CSV` (for `BILLABLE_USAGE`) and `JSON` (for `AUDIT_LOGS`) are supported.
122+
* `credentials_id` - The ID for a Databricks [credential configuration](mws_credentials.md) that represents the AWS IAM role [with policy](../data-sources/aws_assume_role_policy.md) and [trust relationship](../data-sources/aws_assume_role_policy.md) as described in the main billable usage documentation page.
123+
* `storage_configuration_id` - The ID for a Databricks [storage configuration](mws_storage_configurations.md) that represents the S3 bucket with [bucket policy](../data-sources/aws_bucket_policy.md) as described in the main billable usage documentation page.
124+
* `workspace_ids_filter` - (Optional) By default, this log configuration applies to all workspaces associated with your account ID. If your account is on the E2 version of the platform or on a select custom plan that allows multiple workspaces per account, you may have multiple workspaces associated with your account ID. You can optionally set the field as mentioned earlier to an array of workspace IDs. If you plan to use different log delivery configurations for several workspaces, set this explicitly rather than leaving it blank. If you leave this blank and your account ID gets additional workspaces in the future, this configuration will also apply to the new workspaces.
125+
* `delivery_path_prefix` - (Optional) Defaults to empty, which means that logs delivered to the root of the bucket. The value must be a valid S3 object key. It must not start or end with a slash character.
126+
* `delivery_start_time` - (Optional) The optional start month and year for delivery, specified in YYYY-MM format. Defaults to current year and month. Usage is not available before 2019-03.
127+
128+
## Attribute reference
129+
130+
Resource exports the following attributes:
131+
132+
* `config_id` - Databricks log delivery configuration ID.
133+
134+
## Import
135+
136+
This resource can be imported by specifying a combination of an account id and log config id separated by `|`:
137+
138+
```bash
139+
$ terraform import databricks_mws_log_delivery.usage "<account-id>|<log-config-id>"
140+
```
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package acceptance
2+
3+
import (
4+
"os"
5+
"testing"
6+
7+
"github.com/databrickslabs/databricks-terraform/internal/acceptance"
8+
"github.com/databrickslabs/databricks-terraform/internal/qa"
9+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
10+
)
11+
12+
func TestMwsAccLogDelivery(t *testing.T) {
13+
if os.Getenv("CLOUD_ENV") != "MWS" {
14+
t.Skip("Cannot run test on non-MWS environment")
15+
}
16+
acceptance.AccTest(t, resource.TestCase{
17+
Steps: []resource.TestStep{
18+
{
19+
Config: qa.EnvironmentTemplate(t, `
20+
provider "databricks" {
21+
host = "{env.DATABRICKS_HOST}"
22+
username = "{env.DATABRICKS_USERNAME}"
23+
password = "{env.DATABRICKS_PASSWORD}"
24+
}
25+
26+
resource "databricks_mws_credentials" "ld" {
27+
account_id = "{env.DATABRICKS_ACCOUNT_ID}"
28+
credentials_name = "tf-acceptance-logdelivery-{var.RANDOM}"
29+
role_arn = "{env.TEST_LOGDELIVERY_ARN}"
30+
}
31+
32+
resource "databricks_mws_storage_configurations" "ld" {
33+
account_id = "{env.DATABRICKS_ACCOUNT_ID}"
34+
storage_configuration_name = "tf-acceptance-logdelivery-{var.RANDOM}"
35+
bucket_name = "{env.TEST_LOGDELIVERY_BUCKET}"
36+
}
37+
38+
resource "databricks_mws_log_delivery" "usage_logs" {
39+
account_id = "{env.DATABRICKS_ACCOUNT_ID}"
40+
credentials_id = databricks_mws_credentials.ld.credentials_id
41+
storage_configuration_id = databricks_mws_storage_configurations.ld.storage_configuration_id
42+
delivery_path_prefix = "tf-{var.RANDOM}/billable-usage"
43+
config_name = "Usage {var.RANDOM}"
44+
log_type = "BILLABLE_USAGE"
45+
output_format = "CSV"
46+
}
47+
48+
resource "databricks_mws_log_delivery" "audit_logs" {
49+
account_id = "{env.DATABRICKS_ACCOUNT_ID}"
50+
credentials_id = databricks_mws_credentials.ld.credentials_id
51+
storage_configuration_id = databricks_mws_storage_configurations.ld.storage_configuration_id
52+
delivery_path_prefix = "tf-{var.RANDOM}/audit-logs"
53+
config_name = "Audit {var.RANDOM}"
54+
log_type = "AUDIT_LOGS"
55+
output_format = "JSON"
56+
}`),
57+
},
58+
},
59+
})
60+
}

0 commit comments

Comments
 (0)