Skip to content

Commit c23c570

Browse files
authored
Added databricks_model_serving resource (#2054)
1 parent 179d82f commit c23c570

File tree

6 files changed

+729
-1
lines changed

6 files changed

+729
-1
lines changed

docs/resources/mlflow_model.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ $ terraform import databricks_mlflow_model.this <name>
4949
The following resources are often used in the same context:
5050

5151
* [End to end workspace management](../guides/workspace-management.md) guide.
52-
* [databricks_directory](directory.md) to manage directories in [Databricks Workpace](https://docs.databricks.com/workspace/workspace-objects.html).
52+
* [databricks_model_serving](model_serving.md) to serve this model on a Databricks serving endpoint.
53+
* [databricks_directory](directory.md) to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
5354
* [databricks_mlflow_experiment](mlflow_experiment.md) to manage [MLflow experiments](https://docs.databricks.com/data/data-sources/mlflow-experiment.html) in Databricks.
5455
* [databricks_notebook](notebook.md) to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
5556
* [databricks_notebook](../data-sources/notebook.md) data to export a notebook from Databricks Workspace.

docs/resources/model_serving.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
---
2+
subcategory: "Serving"
3+
---
4+
# databricks_model_serving Resource
5+
6+
This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks.
7+
8+
## Example Usage
9+
10+
```hcl
11+
resource "databricks_model_serving" "this" {
12+
name = "ads-serving-endpoint"
13+
config {
14+
served_models {
15+
name = "prod_model"
16+
model_name = "ads-model"
17+
model_version = "2"
18+
workload_size = "Small"
19+
scale_to_zero_enabled = true
20+
}
21+
served_models {
22+
name = "candidate_model"
23+
model_name = "ads-model"
24+
model_version = "4"
25+
workload_size = "Small"
26+
scale_to_zero_enabled = false
27+
}
28+
traffic_config {
29+
routes {
30+
served_model_name = "prod_model"
31+
traffic_percentage = 90
32+
}
33+
routes {
34+
served_model_name = "candidate_model"
35+
traffic_percentage = 10
36+
}
37+
}
38+
}
39+
}
40+
```
41+
42+
## Argument Reference
43+
44+
The following arguments are supported:
45+
46+
* `name` - (Required) The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
47+
* `config` - (Required) The model serving endpoint configuration.
48+
49+
### config Configuration Block
50+
51+
* `served_models` - (Required) Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
52+
* `traffic_config` - A single block represents the traffic split configuration amongst the served models.
53+
54+
### served_models Configuration Block
55+
56+
* `name` - The name of a served model. It must be unique across an endpoint. If not specified, this field will default to `modelname-modelversion`. A served model name can consist of alphanumeric characters, dashes, and underscores.
57+
* `model_name` - (Required) The name of the model in Databricks Model Registry to be served.
58+
* `model_version` - (Required) The version of the model in Databricks Model Registry to be served.
59+
* `workload_size` - (Required) The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
60+
* `scale_to_zero_enabled` - Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is `true`.
61+
62+
### traffic_config Configuration Block
63+
64+
* `routes` - (Required) Each block represents a route that defines traffic to each served model. Each `served_models` block needs to have a corresponding `routes` block
65+
66+
### routes Configuration Block
67+
68+
* `served_model_name` - (Required) The name of the served model this route configures traffic for. This needs to match the name of a `served_models` block
69+
* `traffic_percentage` - (Required) The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
70+
71+
## Timeouts
72+
73+
The `timeouts` block allows you to specify `create` and `update` timeouts. The default right now is 45 minutes for both operations.
74+
75+
```hcl
76+
timeouts {
77+
create = "30m"
78+
}
79+
```
80+
81+
## Import
82+
83+
The model serving resource can be imported using the name of the endpoint.
84+
85+
```bash
86+
$ terraform import databricks_model_serving.this <model-serving-endpoint-name>
87+
```
88+
89+
## Related Resources
90+
91+
The following resources are often used in the same context:
92+
93+
* [End to end workspace management](../guides/workspace-management.md) guide.
94+
* [databricks_directory](directory.md) to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
95+
* [databricks_mlflow_model](mlflow_model.md) to create [MLflow models](https://docs.databricks.com/applications/mlflow/models.html) in Databricks.
96+
* [databricks_notebook](notebook.md) to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
97+
* [databricks_notebook](../data-sources/notebook.md) data to export a notebook from Databricks Workspace.
98+
* [databricks_repo](repo.md) to manage [Databricks Repos](https://docs.databricks.com/repos.html).
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package acceptance
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"testing"
7+
8+
"github.com/databricks/databricks-sdk-go"
9+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest"
10+
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
11+
)
12+
13+
func TestAccModelServing(t *testing.T) {
14+
name := fmt.Sprintf("terraform-test-model-serving-%[1]s",
15+
acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum))
16+
workspaceLevel(t, step{
17+
Template: fmt.Sprintf(`
18+
data "databricks_spark_version" "latest" {
19+
}
20+
resource "databricks_cluster" "this" {
21+
cluster_name = "singlenode-{var.RANDOM}"
22+
spark_version = data.databricks_spark_version.latest.id
23+
instance_pool_id = "{env.TEST_INSTANCE_POOL_ID}"
24+
num_workers = 0
25+
autotermination_minutes = 10
26+
spark_conf = {
27+
"spark.databricks.cluster.profile" = "singleNode"
28+
"spark.master" = "local[*]"
29+
}
30+
custom_tags = {
31+
"ResourceClass" = "SingleNode"
32+
}
33+
library {
34+
pypi {
35+
package = "mlflow"
36+
}
37+
}
38+
}
39+
resource "databricks_mlflow_experiment" "exp" {
40+
name = "/Shared/%[1]s-exp"
41+
}
42+
resource "databricks_mlflow_model" "model" {
43+
name = "%[1]s-model"
44+
}
45+
`, name),
46+
Check: func(s *terraform.State) error {
47+
w := databricks.Must(databricks.NewWorkspaceClient())
48+
id := s.RootModule().Resources["databricks_cluster.this"].Primary.ID
49+
w.CommandExecutor.Execute(context.Background(), id, "python", fmt.Sprintf(`
50+
import time
51+
import mlflow
52+
import mlflow.pyfunc
53+
from mlflow.tracking.artifact_utils import get_artifact_uri
54+
from mlflow.tracking.client import MlflowClient
55+
56+
mlflow.set_experiment("/Shared/%[1]s-exp")
57+
58+
class SampleModel(mlflow.pyfunc.PythonModel):
59+
def predict(self, ctx, input_df):
60+
return 7
61+
artifact_path = 'sample_model'
62+
63+
with mlflow.start_run() as new_run:
64+
mlflow.pyfunc.log_model(python_model=SampleModel(), artifact_path=artifact_path)
65+
run1_id = new_run.info.run_id
66+
source = get_artifact_uri(run_id=run1_id, artifact_path=artifact_path)
67+
68+
client = MlflowClient()
69+
client.create_model_version(name="%[1]s-model", source=source, run_id=run1_id)
70+
client.create_model_version(name="%[1]s-model", source=source, run_id=run1_id)
71+
while client.get_model_version(name="%[1]s-model", version="1").getStatus() != ModelRegistry.ModelVersionStatus.READY:
72+
time.sleep(10)
73+
while client.get_model_version(name="%[1]s-model", version="2").getStatus() != ModelRegistry.ModelVersionStatus.READY:
74+
time.sleep(10)
75+
`, name))
76+
return nil
77+
},
78+
},
79+
step{
80+
Template: fmt.Sprintf(`
81+
resource "databricks_mlflow_experiment" "exp" {
82+
name = "/Shared/%[1]s-exp"
83+
}
84+
resource "databricks_mlflow_model" "model" {
85+
name = "%[1]s-model"
86+
}
87+
resource "databricks_model_serving" "endpoint" {
88+
name = "%[1]s"
89+
config {
90+
served_models {
91+
name = "prod_model"
92+
model_name = "%[1]s-model"
93+
model_version = "1"
94+
workload_size = "Small"
95+
scale_to_zero_enabled = true
96+
}
97+
served_models {
98+
name = "candidate_model"
99+
model_name = "%[1]s-model"
100+
model_version = "2"
101+
workload_size = "Small"
102+
scale_to_zero_enabled = false
103+
}
104+
traffic_config {
105+
routes {
106+
served_model_name = "prod_model"
107+
traffic_percentage = 90
108+
}
109+
routes {
110+
served_model_name = "candidate_model"
111+
traffic_percentage = 10
112+
}
113+
}
114+
}
115+
}
116+
`, name),
117+
},
118+
step{
119+
Template: fmt.Sprintf(`
120+
resource "databricks_mlflow_experiment" "exp" {
121+
name = "/Shared/%[1]s-exp"
122+
}
123+
resource "databricks_mlflow_model" "model" {
124+
name = "%[1]s-model"
125+
}
126+
resource "databricks_model_serving" "endpoint" {
127+
name = "%[1]s"
128+
config {
129+
served_models {
130+
name = "prod_model"
131+
model_name = "%[1]s-model"
132+
model_version = "1"
133+
workload_size = "Small"
134+
scale_to_zero_enabled = true
135+
}
136+
traffic_config {
137+
routes {
138+
served_model_name = "prod_model"
139+
traffic_percentage = 100
140+
}
141+
}
142+
}
143+
}
144+
`, name),
145+
},
146+
)
147+
}

provider/provider.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/databricks/terraform-provider-databricks/repos"
3131
"github.com/databricks/terraform-provider-databricks/scim"
3232
"github.com/databricks/terraform-provider-databricks/secrets"
33+
"github.com/databricks/terraform-provider-databricks/serving"
3334
"github.com/databricks/terraform-provider-databricks/sql"
3435
"github.com/databricks/terraform-provider-databricks/storage"
3536
"github.com/databricks/terraform-provider-databricks/tokens"
@@ -109,6 +110,7 @@ func DatabricksProvider() *schema.Provider {
109110
"databricks_mlflow_experiment": mlflow.ResourceMlflowExperiment(),
110111
"databricks_mlflow_model": mlflow.ResourceMlflowModel(),
111112
"databricks_mlflow_webhook": mlflow.ResourceMlflowWebhook(),
113+
"databricks_model_serving": serving.ResourceModelServing(),
112114
"databricks_mount": storage.ResourceMount(),
113115
"databricks_mws_customer_managed_keys": mws.ResourceMwsCustomerManagedKeys(),
114116
"databricks_mws_credentials": mws.ResourceMwsCredentials(),

serving/resource_model_serving.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package serving
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/databricks/databricks-sdk-go/retries"
8+
"github.com/databricks/databricks-sdk-go/service/endpoints"
9+
"github.com/databricks/terraform-provider-databricks/common"
10+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
11+
)
12+
13+
const DefaultProvisionTimeout = 45 * time.Minute
14+
15+
func ResourceModelServing() *schema.Resource {
16+
s := common.StructToSchema(
17+
endpoints.CreateServingEndpoint{},
18+
func(m map[string]*schema.Schema) map[string]*schema.Schema {
19+
m["name"].ForceNew = true
20+
common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Required = false
21+
common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Optional = true
22+
common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Default = true
23+
common.MustSchemaPath(m, "config", "served_models", "name").Computed = true
24+
return m
25+
})
26+
27+
return common.Resource{
28+
Create: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
29+
w, err := c.WorkspaceClient()
30+
if err != nil {
31+
return err
32+
}
33+
var e endpoints.CreateServingEndpoint
34+
common.DataToStructPointer(d, s, &e)
35+
endpoint, err := w.ServingEndpoints.CreateAndWait(ctx, e, retries.Timeout[endpoints.ServingEndpointDetailed](d.Timeout(schema.TimeoutCreate)))
36+
if err != nil {
37+
return err
38+
}
39+
d.SetId(endpoint.Name)
40+
return nil
41+
},
42+
Read: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
43+
w, err := c.WorkspaceClient()
44+
if err != nil {
45+
return err
46+
}
47+
endpoint, err := w.ServingEndpoints.GetByName(ctx, d.Id())
48+
if err != nil {
49+
return err
50+
}
51+
return common.StructToData(*endpoint, s, d)
52+
},
53+
Update: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
54+
w, err := c.WorkspaceClient()
55+
if err != nil {
56+
return err
57+
}
58+
var e endpoints.CreateServingEndpoint
59+
common.DataToStructPointer(d, s, &e)
60+
e.Config.Name = e.Name
61+
_, err = w.ServingEndpoints.UpdateConfigAndWait(ctx, e.Config, retries.Timeout[endpoints.ServingEndpointDetailed](d.Timeout(schema.TimeoutUpdate)))
62+
return err
63+
},
64+
Delete: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
65+
w, err := c.WorkspaceClient()
66+
if err != nil {
67+
return err
68+
}
69+
return w.ServingEndpoints.DeleteByName(ctx, d.Id())
70+
},
71+
StateUpgraders: []schema.StateUpgrader{},
72+
Schema: s,
73+
SchemaVersion: 0,
74+
Timeouts: &schema.ResourceTimeout{
75+
Create: schema.DefaultTimeout(DefaultProvisionTimeout),
76+
Update: schema.DefaultTimeout(DefaultProvisionTimeout),
77+
},
78+
}.ToResource()
79+
}

0 commit comments

Comments
 (0)