Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions demo/conf/onboarding.template
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_cluster_by":["dob"],
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/customers.json",
"bronze_catalog_quarantine_prod": "{uc_catalog_name}",
"bronze_database_quarantine_prod": "{bronze_schema}",
"bronze_quarantine_table": "customers_quarantine",
"bronze_quarantine_table_comment": "customers quarantine table",
"bronze_quarantine_table_path_prod": "{uc_volume_path}/data/bronze/customers_quarantine",
"bronze_quarantine_table_cluster_by": ["dob"],
"bronze_quarantine_table_cluster_by_auto": "true",
"silver_catalog_prod": "{uc_catalog_name}",
"silver_database_prod": "{silver_schema}",
"silver_table": "customers",
Expand All @@ -46,7 +47,8 @@
"_rescued_data"
]
},
"silver_cluster_by":["customer_id"],
"silver_cluster_by":["dob"],
"silver_cluster_by_auto": true,
"silver_transformation_json_prod": "{uc_volume_path}/demo/conf/silver_transformations.json",
"silver_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/customers_silver_dqe.json"

Expand All @@ -73,17 +75,19 @@
"header": "true"
},
"bronze_cluster_by":["transaction_date"],
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/transactions.json",
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
"bronze_quarantine_table": "transactions_quarantine",
"bronze_quarantine_table_comment": "transactions bronze quarantine table",
"bronze_quarantine_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/transactions_quarantine",
"bronze_quarantine_table_cluster_by": ["transaction_date"],
"bronze_quarantine_table_cluster_by_auto": "true",
"silver_catalog_prod": "{uc_catalog_name}",
"silver_database_prod": "{silver_schema}",
"silver_table": "transactions",
"silver_table_comment": "transactions silver table",
"silver_table_path_prod": "{uc_volume_path}/data/silver/transactions",
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/transactions",
"silver_cdc_apply_changes": {
"keys": [
"transaction_id"
Expand All @@ -97,8 +101,7 @@
"_rescued_data"
]
},
"silver_cluster_by":["transaction_date"],
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/transactions",
"silver_cluster_by_auto": "true",
"silver_transformation_json_prod": "{uc_volume_path}/demo/conf/silver_transformations.json",
"silver_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/transactions_silver_dqe.json"
},
Expand All @@ -117,24 +120,25 @@
"bronze_database_prod": "{bronze_schema}",
"bronze_table": "products",
"bronze_table_comment": "products bronze table",
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/products",
"bronze_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/products",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/products",
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/products.json",
"bronze_database_quarantine_prod": "{uc_catalog_name}.{bronze_schema}",
"bronze_quarantine_table": "products_quarantine",
"bronze_quarantine_table_comment": "products quarantine bronze table",
"bronze_quarantine_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/products_quarantine",
"bronze_quarantine_table_cluster_by": ["product_id"],
"bronze_quarantine_table_cluster_by_auto": "true",
"silver_catalog_prod": "{uc_catalog_name}",
"silver_database_prod": "{silver_schema}",
"silver_table": "products",
"silver_table_comment": "products silver table",
"silver_table_path_prod": "{uc_volume_path}/data/silver/products",
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/products",
"silver_cdc_apply_changes": {
"keys": [
"product_id"
Expand All @@ -148,7 +152,7 @@
"_rescued_data"
]
},
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/products",
"silver_cluster_by_auto": "true",
"silver_transformation_json_prod": "{uc_volume_path}/demo/conf/silver_transformations.json",
"silver_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/products_silver_dqe.json"
},
Expand All @@ -167,13 +171,12 @@
"bronze_database_prod": "{bronze_schema}",
"bronze_table": "stores",
"bronze_table_comment": "stores bronze table",
"bronze_table_path_prod": "{uc_volume_path}/data/bronze/stores",
"bronze_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/stores",
"bronze_reader_options": {
"cloudFiles.format": "csv",
"cloudFiles.rescuedDataColumn": "_rescued_data",
"header": "true"
},
"bronze_table_path_prod": "{uc_volume_path}/demo/resources/data/bronze/stores",
"bronze_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/stores.json",
"bronze_catalog_quarantine_prod": "{uc_catalog_name}",
"bronze_database_quarantine_prod": "{bronze_schema}",
Expand All @@ -185,7 +188,7 @@
"silver_database_prod": "{silver_schema}",
"silver_table": "stores",
"silver_table_comment": "stores silver table",
"silver_table_path_prod": "{uc_volume_path}/data/silver/stores",
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/stores",
"silver_cdc_apply_changes": {
"keys": [
"store_id"
Expand All @@ -199,7 +202,6 @@
"_rescued_data"
]
},
"silver_table_path_prod": "{uc_volume_path}/demo/resources/data/silver/stores",
"silver_transformation_json_prod": "{uc_volume_path}/demo/conf/silver_transformations.json",
"silver_data_quality_expectations_json_prod": "{uc_volume_path}/demo/conf/dqe/stores_silver_dqe.json"
}
Expand Down
2 changes: 2 additions & 0 deletions docs/content/getting_started/metadatapreperation.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ The `onboarding.json` file contains links to [silver_transformations.json](https
| bronze_reader_options | Reader options which can be provided to spark reader <br> e.g multiline=true,header=true in json format |
| bronze_parition_columns | Bronze table partition cols list |
| bronze_cluster_by | Bronze tables cluster by cols list |
| bronze_cluster_by_auto | Enable automatic liquid clustering on the bronze table. Boolean value (true/false). Can be combined with bronze_cluster_by to define initial clustering keys. See [Automatic liquid clustering](https://docs.databricks.com/aws/en/delta/clustering#auto-liquid) |
| bronze_cdc_apply_changes | Bronze cdc apply changes Json |
| bronze_apply_changes_from_snapshot | Bronze apply changes from snapshot Json e.g. Mandatory fields: keys=["userId"], scd_type=`1` or `2` optional fields: track_history_column_list=`[col1]`, track_history_except_column_list=`[col2]` |
| bronze_table_path_{env} | Bronze table storage path.|
Expand All @@ -57,6 +58,7 @@ The `onboarding.json` file contains links to [silver_transformations.json](https
| silver_table_comment | Silver table comments |
| silver_partition_columns | Silver table partition columns list |
| silver_cluster_by | Silver tables cluster by cols list |
| silver_cluster_by_auto | Enable automatic liquid clustering on the silver table. Boolean value (true/false). Can be combined with silver_cluster_by to define initial clustering keys. See [Automatic liquid clustering](https://docs.databricks.com/aws/en/delta/clustering#auto-liquid) |
| silver_cdc_apply_changes | Silver cdc apply changes Json |
| silver_table_path_{env} | Silver table storage path. |
| silver_table_properties | Lakeflow Declarative Pipeline table properties map. e.g. `{"pipelines.autoOptimize.managed": "false" , "pipelines.autoOptimize.zOrderCols": "year,month", "pipelines.reset.allowed": "false"}` |
Expand Down
9 changes: 9 additions & 0 deletions docs/content/releases/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ date: 2021-08-04T14:50:11-04:00
weight: 80
draft: false
---
# v0.0.11

## Enhancements
- Added automatic liquid clustering support (`cluster_by_auto`) for bronze and silver tables [Issue #238](https://github.com/databrickslabs/dlt-meta/issues/238)
- Enables automatic liquid clustering on streaming tables via the `cluster_by_auto` parameter
- Can be combined with `cluster_by` to define initial clustering keys followed by automatic optimization
- Supported for both bronze and silver layer tables
- See [Automatic liquid clustering documentation](https://docs.databricks.com/aws/en/delta/clustering#auto-liquid) for more details

# v0.0.10

## ⚠️ Breaking Changes
Expand Down
18 changes: 10 additions & 8 deletions integration_tests/conf/cloudfiles-onboarding.template
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
"bronze_table_properties": {
"pipelines.autoOptimize.managed": "true"
},
"bronze_cluster_by": ["id", "email"],
"bronze_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/customers/bronze_data_quality_expectations.json",
"bronze_catalog_quarantine_it": "{uc_catalog_name}",
"bronze_database_quarantine_it": "{bronze_schema}",
Expand All @@ -42,6 +41,8 @@
"pipelines.reset.allowed": "false"
},
"bronze_quarantine_table_cluster_by":["id", "email"],
"bronze_quarantine_table_cluster_by_auto": true,
"bronze_cluster_by_auto": "true",
"bronze_append_flows": [
{
"name": "customer_bronze_flow",
Expand All @@ -68,15 +69,15 @@
"silver_table_properties": {
"pipelines.reset.allowed": "false"
},
"silver_table_cluster_by":["customer_id"],
"silver_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/customers/silver_data_quality_expectations.json",
"silver_catalog_quarantine_it":"{uc_catalog_name}",
"silver_database_quarantine_it":"{silver_schema}",
"silver_quarantine_table":"customers_quarantine",
"silver_quarantine_table_properties": {
"pipelines.reset.allowed": "false"
},
"silver_cluster_by":["id", "email"],
"silver_quarantine_table_cluster_by_auto": true,
"silver_cluster_by_auto": "true",
"silver_append_flows": [
{
"name": "customers_silver_flow",
Expand Down Expand Up @@ -120,7 +121,6 @@
"bronze_table_properties": {
"pipelines.reset.allowed": "true"
},
"bronze_table_cluster_by":["id", "customer_id"],
"bronze_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/transactions/bronze_data_quality_expectations.json",
"bronze_catalog_quarantine_it": "{uc_catalog_name}",
"bronze_database_quarantine_it": "{bronze_schema}",
Expand All @@ -129,8 +129,9 @@
"bronze_quarantine_table_properties": {
"pipelines.reset.allowed": "true",
"pipelines.autoOptimize.managed": "false"
},
"bronze_quarantine_table_cluster_by": ["id", "customer_id"],
},
"bronze_quarantine_table_cluster_by_auto": true,
"bronze_cluster_by_auto": "true",
"bronze_append_flows": [
{
"name": "transactions_bronze_flow",
Expand Down Expand Up @@ -171,10 +172,11 @@
"silver_table_properties": {
"pipelines.reset.allowed": "false"
},
"silver_cluster_by":["id", "customer_id"],
"silver_cluster_by_auto": "true",
"silver_catalog_quarantine_it":"{uc_catalog_name}",
"silver_database_quarantine_it":"{silver_schema}",
"silver_quarantine_table":"transactions_quarantine",
"silver_quarantine_table_cluster_by":["id","customer_id"]
"silver_quarantine_table_cluster_by":["id","customer_id"],
"silver_quarantine_table_cluster_by_auto": true
}
]
6 changes: 4 additions & 2 deletions integration_tests/conf/cloudfiles-onboarding_A2.template
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
"bronze_table_properties": {
"pipelines.autoOptimize.managed": "true"
},
"bronze_cluster_by":["id", "email"],
"bronze_cluster_by":["id", "email"],
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/customers/bronze_data_quality_expectations.json",
"bronze_catalog_quarantine_it": "{uc_catalog_name}",
"bronze_database_quarantine_it": "{bronze_schema}",
Expand All @@ -37,6 +38,7 @@
"bronze_quarantine_table_properties": {
"pipelines.reset.allowed": "false"
},
"bronze_quarantine_table_cluster_by":["id", "email"]
"bronze_quarantine_table_cluster_by":["id", "email"],
"bronze_quarantine_table_cluster_by_auto": true
}
]
2 changes: 2 additions & 0 deletions integration_tests/conf/eventhub-onboarding.template
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@
"bronze_database_it": "{bronze_schema}",
"bronze_table": "bronze_{run_id}_iot",
"bronze_partition_columns": "date",
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/iot/bronze_data_quality_expectations.json",
"bronze_catalog_quarantine_it": "{uc_catalog_name}",
"bronze_database_quarantine_it": "{bronze_schema}",
"bronze_quarantine_table": "bronze_{run_id}_iot_quarantine",
"bronze_quarantine_table_path_it": "{uc_volume_path}/data/bronze/iot_quarantine",
"bronze_quarantine_table_cluster_by_auto": true,
"bronze_sinks": [
{
"name": "bronze_eventhub_sink",
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/conf/kafka-onboarding.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
"bronze_table": "bronze_{run_id}_iot",
"bronze_partition_columns": "date",
"bronze_table_path_it": "{uc_volume_path}/data/bronze/iot",
"bronze_cluster_by_auto": true,
"bronze_data_quality_expectations_json_it": "{uc_volume_path}/integration_tests/conf/dqe/iot/bronze_data_quality_expectations.json",
"bronze_catalog_quarantine_it": "{uc_catalog_name}",
"bronze_database_quarantine_it": "{bronze_schema}",
"bronze_quarantine_table": "bronze_{run_id}_iot_quarantine",
"bronze_quarantine_table_path_it": "{uc_volume_path}/data/bronze/iot_quarantine",
"bronze_quarantine_table_cluster_by_auto": true,
"bronze_sinks": [
{
"name": "bronze_customer_kafka_sink1",
Expand Down
4 changes: 4 additions & 0 deletions integration_tests/conf/snapshot-onboarding.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
},
"bronze_database_it": "{uc_catalog_name}.{bronze_schema}",
"bronze_table": "products",
"bronze_cluster_by_auto": true,
"bronze_apply_changes_from_snapshot": {
"keys": [
"product_id"
Expand All @@ -23,6 +24,7 @@
"silver_database_it": "{silver_schema}",
"silver_table": "products",
"silver_table_comment": "products silver table",
"silver_cluster_by_auto": true,
"silver_apply_changes_from_snapshot":{
"keys": [
"product_id"
Expand All @@ -45,6 +47,7 @@
},
"bronze_database_it": "{uc_catalog_name}.{bronze_schema}",
"bronze_table": "stores",
"bronze_cluster_by_auto": true,
"bronze_apply_changes_from_snapshot": {
"keys": [
"store_id"
Expand All @@ -54,6 +57,7 @@
"silver_catalog_it": "{uc_catalog_name}",
"silver_database_it": "{silver_schema}",
"silver_table": "stores",
"silver_cluster_by_auto": true,
"silver_apply_changes_from_snapshot":{
"keys": [
"store_id"
Expand Down
3 changes: 2 additions & 1 deletion integration_tests/run_integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import uuid
import webbrowser
from dataclasses import dataclass
from datetime import timedelta

# Add project root to Python path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down Expand Up @@ -799,7 +800,7 @@ def launch_workflow(self, runner_conf: DLTMetaRunnerConf):
f"{self.ws.config.host}/jobs/{created_job.job_id}?o={self.ws.get_workspace_id()}"
)
print(f"Waiting for job to complete. job_id={created_job.job_id}")
run_by_id = self.ws.jobs.run_now(job_id=created_job.job_id).result()
run_by_id = self.ws.jobs.run_now(job_id=created_job.job_id).result(timeout=timedelta(minutes=20))
print(f"Job run finished. run_id={run_by_id}")
return created_job

Expand Down
Loading