Added databricks_model_serving resource (#2054)

arpitjasa-db · web-flow · commit c23c57035656 · 2023-03-08T19:28:08.000+01:00
diff --git a/docs/resources/mlflow_model.md b/docs/resources/mlflow_model.md
@@ -49,7 +49,8 @@ $ terraform import databricks_mlflow_model.this <name>
 The following resources are often used in the same context:
 
 * [End to end workspace management](../guides/workspace-management.md) guide.
-* [databricks_directory](directory.md) to manage directories in [Databricks Workpace](https://docs.databricks.com/workspace/workspace-objects.html).
+* [databricks_model_serving](model_serving.md) to serve this model on a Databricks serving endpoint.
+* [databricks_directory](directory.md) to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
 * [databricks_mlflow_experiment](mlflow_experiment.md) to manage [MLflow experiments](https://docs.databricks.com/data/data-sources/mlflow-experiment.html) in Databricks.
 * [databricks_notebook](notebook.md) to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
 * [databricks_notebook](../data-sources/notebook.md) data to export a notebook from Databricks Workspace.
diff --git a/docs/resources/model_serving.md b/docs/resources/model_serving.md
@@ -0,0 +1,98 @@
+---
+subcategory: "Serving"
+---
+# databricks_model_serving Resource
+
+This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks.
+
+## Example Usage
+
+```hcl
+resource "databricks_model_serving" "this" {
+  name = "ads-serving-endpoint"
+  config {
+    served_models {
+      name = "prod_model"
+      model_name = "ads-model"
+      model_version = "2"
+      workload_size = "Small"
+      scale_to_zero_enabled = true
+    }
+    served_models {
+      name = "candidate_model"
+      model_name = "ads-model"
+      model_version = "4"
+      workload_size = "Small"
+      scale_to_zero_enabled = false
+    }
+    traffic_config {
+      routes {
+        served_model_name = "prod_model"
+        traffic_percentage = 90
+      }
+      routes {
+        served_model_name = "candidate_model"
+        traffic_percentage = 10
+      }
+    }
+  }
+}
+```
+
+## Argument Reference
+
+The following arguments are supported:
+
+* `name` - (Required) The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
+* `config` - (Required) The model serving endpoint configuration.
+
+### config Configuration Block
+
+* `served_models` - (Required) Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
+* `traffic_config` - A single block represents the traffic split configuration amongst the served models.
+
+### served_models Configuration Block
+
+* `name` - The name of a served model. It must be unique across an endpoint. If not specified, this field will default to `modelname-modelversion`. A served model name can consist of alphanumeric characters, dashes, and underscores.
+* `model_name` - (Required) The name of the model in Databricks Model Registry to be served.
+* `model_version` - (Required) The version of the model in Databricks Model Registry to be served.
+* `workload_size` - (Required) The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
+* `scale_to_zero_enabled` - Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is `true`.
+
+### traffic_config Configuration Block
+
+* `routes` - (Required) Each block represents a route that defines traffic to each served model. Each `served_models` block needs to have a corresponding `routes` block
+
+### routes Configuration Block
+
+* `served_model_name` - (Required) The name of the served model this route configures traffic for. This needs to match the name of a `served_models` block
+* `traffic_percentage` - (Required) The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
+
+## Timeouts
+
+The `timeouts` block allows you to specify `create` and `update` timeouts. The default right now is 45 minutes for both operations.
+
+```hcl
+timeouts {
+  create = "30m"
+}
+```
+
+## Import
+
+The model serving resource can be imported using the name of the endpoint.
+
+```bash
+$ terraform import databricks_model_serving.this <model-serving-endpoint-name>
+```
+
+## Related Resources
+
+The following resources are often used in the same context:
+
+* [End to end workspace management](../guides/workspace-management.md) guide.
+* [databricks_directory](directory.md) to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
+* [databricks_mlflow_model](mlflow_model.md) to create [MLflow models](https://docs.databricks.com/applications/mlflow/models.html) in Databricks.
+* [databricks_notebook](notebook.md) to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
+* [databricks_notebook](../data-sources/notebook.md) data to export a notebook from Databricks Workspace.
+* [databricks_repo](repo.md) to manage [Databricks Repos](https://docs.databricks.com/repos.html).
diff --git a/internal/acceptance/model_serving_test.go b/internal/acceptance/model_serving_test.go
@@ -0,0 +1,147 @@
+package acceptance
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/databricks/databricks-sdk-go"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
+)
+
+func TestAccModelServing(t *testing.T) {
+	name := fmt.Sprintf("terraform-test-model-serving-%[1]s",
+		acctest.RandStringFromCharSet(5, acctest.CharSetAlphaNum))
+	workspaceLevel(t, step{
+		Template: fmt.Sprintf(`
+		data "databricks_spark_version" "latest" {
+		}
+		resource "databricks_cluster" "this" {
+			cluster_name = "singlenode-{var.RANDOM}"
+			spark_version = data.databricks_spark_version.latest.id
+			instance_pool_id = "{env.TEST_INSTANCE_POOL_ID}"
+			num_workers = 0
+			autotermination_minutes = 10
+			spark_conf = {
+				"spark.databricks.cluster.profile" = "singleNode"
+				"spark.master" = "local[*]"
+			}
+			custom_tags = {
+				"ResourceClass" = "SingleNode"
+			}
+			library {
+				pypi {
+					package = "mlflow"
+				}
+			}
+		}
+		resource "databricks_mlflow_experiment" "exp" {
+			name = "/Shared/%[1]s-exp"
+		}
+		resource "databricks_mlflow_model" "model" {
+			name = "%[1]s-model"
+		}
+		`, name),
+		Check: func(s *terraform.State) error {
+			w := databricks.Must(databricks.NewWorkspaceClient())
+			id := s.RootModule().Resources["databricks_cluster.this"].Primary.ID
+			w.CommandExecutor.Execute(context.Background(), id, "python", fmt.Sprintf(`
+				import time
+				import mlflow
+				import mlflow.pyfunc
+				from mlflow.tracking.artifact_utils import get_artifact_uri
+				from mlflow.tracking.client import MlflowClient
+
+				mlflow.set_experiment("/Shared/%[1]s-exp")
+
+				class SampleModel(mlflow.pyfunc.PythonModel):
+					def predict(self, ctx, input_df):
+						return 7
+				artifact_path = 'sample_model'
+				
+				with mlflow.start_run() as new_run:
+					mlflow.pyfunc.log_model(python_model=SampleModel(), artifact_path=artifact_path)
+					run1_id = new_run.info.run_id
+					source = get_artifact_uri(run_id=run1_id, artifact_path=artifact_path)
+
+				client = MlflowClient()
+				client.create_model_version(name="%[1]s-model", source=source, run_id=run1_id)
+				client.create_model_version(name="%[1]s-model", source=source, run_id=run1_id)
+				while client.get_model_version(name="%[1]s-model", version="1").getStatus() != ModelRegistry.ModelVersionStatus.READY:
+					time.sleep(10)
+				while client.get_model_version(name="%[1]s-model", version="2").getStatus() != ModelRegistry.ModelVersionStatus.READY:
+					time.sleep(10)
+			`, name))
+			return nil
+		},
+	},
+		step{
+			Template: fmt.Sprintf(`
+			resource "databricks_mlflow_experiment" "exp" {
+				name = "/Shared/%[1]s-exp"
+			}
+			resource "databricks_mlflow_model" "model" {
+				name = "%[1]s-model"
+			}
+			resource "databricks_model_serving" "endpoint" {
+				name = "%[1]s"
+				config {
+					served_models {
+						name = "prod_model"
+						model_name = "%[1]s-model"
+						model_version = "1"
+						workload_size = "Small"
+						scale_to_zero_enabled = true
+					}
+					served_models {
+						name = "candidate_model"
+						model_name = "%[1]s-model"
+						model_version = "2"
+						workload_size = "Small"
+						scale_to_zero_enabled = false
+					}
+					traffic_config {
+						routes {
+							served_model_name = "prod_model"
+							traffic_percentage = 90
+						}
+						routes {
+							served_model_name = "candidate_model"
+							traffic_percentage = 10
+						}
+					}
+				}
+			}
+		`, name),
+		},
+		step{
+			Template: fmt.Sprintf(`
+			resource "databricks_mlflow_experiment" "exp" {
+				name = "/Shared/%[1]s-exp"
+			}
+			resource "databricks_mlflow_model" "model" {
+				name = "%[1]s-model"
+			}
+			resource "databricks_model_serving" "endpoint" {
+				name = "%[1]s"
+				config {
+					served_models {
+						name = "prod_model"
+						model_name = "%[1]s-model"
+						model_version = "1"
+						workload_size = "Small"
+						scale_to_zero_enabled = true
+					}
+					traffic_config {
+						routes {
+							served_model_name = "prod_model"
+							traffic_percentage = 100
+						}
+					}
+				}
+			}
+		`, name),
+		},
+	)
+}
diff --git a/provider/provider.go b/provider/provider.go
@@ -30,6 +30,7 @@ import (
 	"github.com/databricks/terraform-provider-databricks/repos"
 	"github.com/databricks/terraform-provider-databricks/scim"
 	"github.com/databricks/terraform-provider-databricks/secrets"
+	"github.com/databricks/terraform-provider-databricks/serving"
 	"github.com/databricks/terraform-provider-databricks/sql"
 	"github.com/databricks/terraform-provider-databricks/storage"
 	"github.com/databricks/terraform-provider-databricks/tokens"
@@ -109,6 +110,7 @@ func DatabricksProvider() *schema.Provider {
 			"databricks_mlflow_experiment":           mlflow.ResourceMlflowExperiment(),
 			"databricks_mlflow_model":                mlflow.ResourceMlflowModel(),
 			"databricks_mlflow_webhook":              mlflow.ResourceMlflowWebhook(),
+			"databricks_model_serving":               serving.ResourceModelServing(),
 			"databricks_mount":                       storage.ResourceMount(),
 			"databricks_mws_customer_managed_keys":   mws.ResourceMwsCustomerManagedKeys(),
 			"databricks_mws_credentials":             mws.ResourceMwsCredentials(),
diff --git a/serving/resource_model_serving.go b/serving/resource_model_serving.go
@@ -0,0 +1,79 @@
+package serving
+
+import (
+	"context"
+	"time"
+
+	"github.com/databricks/databricks-sdk-go/retries"
+	"github.com/databricks/databricks-sdk-go/service/endpoints"
+	"github.com/databricks/terraform-provider-databricks/common"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
+)
+
+const DefaultProvisionTimeout = 45 * time.Minute
+
+func ResourceModelServing() *schema.Resource {
+	s := common.StructToSchema(
+		endpoints.CreateServingEndpoint{},
+		func(m map[string]*schema.Schema) map[string]*schema.Schema {
+			m["name"].ForceNew = true
+			common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Required = false
+			common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Optional = true
+			common.MustSchemaPath(m, "config", "served_models", "scale_to_zero_enabled").Default = true
+			common.MustSchemaPath(m, "config", "served_models", "name").Computed = true
+			return m
+		})
+
+	return common.Resource{
+		Create: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			var e endpoints.CreateServingEndpoint
+			common.DataToStructPointer(d, s, &e)
+			endpoint, err := w.ServingEndpoints.CreateAndWait(ctx, e, retries.Timeout[endpoints.ServingEndpointDetailed](d.Timeout(schema.TimeoutCreate)))
+			if err != nil {
+				return err
+			}
+			d.SetId(endpoint.Name)
+			return nil
+		},
+		Read: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			endpoint, err := w.ServingEndpoints.GetByName(ctx, d.Id())
+			if err != nil {
+				return err
+			}
+			return common.StructToData(*endpoint, s, d)
+		},
+		Update: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			var e endpoints.CreateServingEndpoint
+			common.DataToStructPointer(d, s, &e)
+			e.Config.Name = e.Name
+			_, err = w.ServingEndpoints.UpdateConfigAndWait(ctx, e.Config, retries.Timeout[endpoints.ServingEndpointDetailed](d.Timeout(schema.TimeoutUpdate)))
+			return err
+		},
+		Delete: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			return w.ServingEndpoints.DeleteByName(ctx, d.Id())
+		},
+		StateUpgraders: []schema.StateUpgrader{},
+		Schema:         s,
+		SchemaVersion:  0,
+		Timeouts: &schema.ResourceTimeout{
+			Create: schema.DefaultTimeout(DefaultProvisionTimeout),
+			Update: schema.DefaultTimeout(DefaultProvisionTimeout),
+		},
+	}.ToResource()
+}
diff --git a/serving/resource_model_serving_test.go b/serving/resource_model_serving_test.go