diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index baefa0efc..a7b80d538 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -bdd8536d26484460f450b1d17722c01c5a6a50a9 \ No newline at end of file +cd641c9dd4febe334b339dd7878d099dcf0eeab5 \ No newline at end of file diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index a835be1fa..7ac3c892b 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -11,3 +11,7 @@ ### Internal Changes ### API Changes +* Added [w.forecasting](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/ml/forecasting.html) workspace-level service. +* Added `statement_id` field for `databricks.sdk.service.dashboards.GenieQueryAttachment`. +* Added `could_not_get_model_deployments_exception` enum value for `databricks.sdk.service.dashboards.MessageErrorType`. +* [Breaking] Removed `jwks_uri` field for `databricks.sdk.service.oauth2.OidcFederationPolicy`. diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 141e7e44d..48f59d48a 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -63,7 +63,8 @@ ProviderExchangeFiltersAPI, ProviderExchangesAPI, ProviderFilesAPI, ProviderListingsAPI, ProviderPersonalizationRequestsAPI, ProviderProviderAnalyticsDashboardsAPI, ProviderProvidersAPI) -from databricks.sdk.service.ml import ExperimentsAPI, ModelRegistryAPI +from databricks.sdk.service.ml import (ExperimentsAPI, ForecastingAPI, + ModelRegistryAPI) from databricks.sdk.service.oauth2 import (AccountFederationPolicyAPI, CustomAppIntegrationAPI, OAuthPublishedAppsAPI, @@ -305,6 +306,7 @@ def __init__( self._workspace = WorkspaceExt(self._api_client) self._workspace_bindings = service.catalog.WorkspaceBindingsAPI(self._api_client) self._workspace_conf = service.settings.WorkspaceConfAPI(self._api_client) + self._forecasting = service.ml.ForecastingAPI(self._api_client) @property def config(self) -> client.Config: @@ -808,6 +810,11 @@ def workspace_conf(self) -> service.settings.WorkspaceConfAPI: """This API allows updating known workspace settings for advanced users.""" return self._workspace_conf + @property + def forecasting(self) -> service.ml.ForecastingAPI: + """The Forecasting API allows you to create and get serverless forecasting experiments.""" + return self._forecasting + def get_workspace_id(self) -> int: """Get the workspace ID of the workspace that this client is connected to.""" response = self._api_client.do("GET", "/api/2.0/preview/scim/v2/Me", response_headers=["X-Databricks-Org-Id"]) diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 1a46811bf..3810d631c 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -3846,6 +3846,10 @@ def from_dict(cls, d: Dict[str, Any]) -> DestroyResponse: @dataclass class DiskSpec: + """Describes the disks that are launched for each instance in the spark cluster. For example, if + the cluster has 3 instances, each instance is configured to launch 2 disks, 100 GiB each, then + Databricks will launch a total of 6 disks, 100 GiB each, for this cluster.""" + disk_count: Optional[int] = None """The number of disks launched for each instance: - This feature is only enabled for supported node types. - Users can choose up to the limit of the disks supported by the node type. - For @@ -3920,9 +3924,15 @@ def from_dict(cls, d: Dict[str, Any]) -> DiskSpec: @dataclass class DiskType: + """Describes the disk type.""" + azure_disk_volume_type: Optional[DiskTypeAzureDiskVolumeType] = None + """All Azure Disk types that Databricks supports. See + https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks""" ebs_volume_type: Optional[DiskTypeEbsVolumeType] = None + """All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for + details.""" def as_dict(self) -> dict: """Serializes the DiskType into a dictionary suitable for use as a JSON request body.""" @@ -3952,12 +3962,16 @@ def from_dict(cls, d: Dict[str, Any]) -> DiskType: class DiskTypeAzureDiskVolumeType(Enum): + """All Azure Disk types that Databricks supports. See + https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks""" PREMIUM_LRS = "PREMIUM_LRS" STANDARD_LRS = "STANDARD_LRS" class DiskTypeEbsVolumeType(Enum): + """All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for + details.""" GENERAL_PURPOSE_SSD = "GENERAL_PURPOSE_SSD" THROUGHPUT_OPTIMIZED_HDD = "THROUGHPUT_OPTIMIZED_HDD" @@ -3998,6 +4012,7 @@ def from_dict(cls, d: Dict[str, Any]) -> DockerBasicAuth: @dataclass class DockerImage: basic_auth: Optional[DockerBasicAuth] = None + """Basic auth with username and password""" url: Optional[str] = None """URL of the docker image.""" @@ -5334,7 +5349,7 @@ class GetInstancePool: - Currently, Databricks allows at most 45 custom tags""" default_tags: Optional[Dict[str, str]] = None - """Tags that are added by Databricks regardless of any `custom_tags`, including: + """Tags that are added by Databricks regardless of any ``custom_tags``, including: - Vendor: Databricks @@ -6250,7 +6265,7 @@ class InstancePoolAndStats: - Currently, Databricks allows at most 45 custom tags""" default_tags: Optional[Dict[str, str]] = None - """Tags that are added by Databricks regardless of any `custom_tags`, including: + """Tags that are added by Databricks regardless of any ``custom_tags``, including: - Vendor: Databricks @@ -6427,10 +6442,10 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAndStats: @dataclass class InstancePoolAwsAttributes: + """Attributes set during instance pool creation which are related to Amazon Web Services.""" + availability: Optional[InstancePoolAwsAttributesAvailability] = None - """Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability""" + """Availability type used for the spot nodes.""" spot_bid_price_percent: Optional[int] = None """Calculates the bid price for AWS spot instances, as a percentage of the corresponding instance @@ -6439,10 +6454,7 @@ class InstancePoolAwsAttributes: instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field - will be considered. Note that, for safety, we enforce this field to be no more than 10000. - - The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.""" + will be considered. Note that, for safety, we enforce this field to be no more than 10000.""" zone_id: Optional[str] = None """Identifier for the availability zone/datacenter in which the cluster resides. This string will @@ -6485,9 +6497,7 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAwsAttributes: class InstancePoolAwsAttributesAvailability(Enum): - """Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability""" + """The set of AWS availability types supported when setting up nodes for a cluster.""" ON_DEMAND = "ON_DEMAND" SPOT = "SPOT" @@ -6495,14 +6505,16 @@ class InstancePoolAwsAttributesAvailability(Enum): @dataclass class InstancePoolAzureAttributes: + """Attributes set during instance pool creation which are related to Azure.""" + availability: Optional[InstancePoolAzureAttributesAvailability] = None - """Shows the Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability""" + """Availability type used for the spot nodes.""" spot_bid_max_price: Optional[float] = None - """The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidMaxPrice.""" + """With variable pricing, you have option to set a max price, in US dollars (USD) For example, the + value 2 would be a max price of $2.00 USD per hour. If you set the max price to be -1, the VM + won't be evicted based on price. The price for the VM will be the current price for spot or the + price for a standard VM, which ever is less, as long as there is capacity and quota available.""" def as_dict(self) -> dict: """Serializes the InstancePoolAzureAttributes into a dictionary suitable for use as a JSON request body.""" @@ -6532,9 +6544,7 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAzureAttributes: class InstancePoolAzureAttributesAvailability(Enum): - """Shows the Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability""" + """The set of Azure availability types supported when setting up nodes for a cluster.""" ON_DEMAND_AZURE = "ON_DEMAND_AZURE" SPOT_AZURE = "SPOT_AZURE" @@ -6542,6 +6552,8 @@ class InstancePoolAzureAttributesAvailability(Enum): @dataclass class InstancePoolGcpAttributes: + """Attributes set during instance pool creation which are related to GCP.""" + gcp_availability: Optional[GcpAvailability] = None """This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.""" @@ -6756,7 +6768,10 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolPermissionsRequest: class InstancePoolState(Enum): - """Current state of the instance pool.""" + """The state of a Cluster. The current allowable state transitions are as follows: + + - ``ACTIVE`` -> ``STOPPED`` - ``ACTIVE`` -> ``DELETED`` - ``STOPPED`` -> ``ACTIVE`` - + ``STOPPED`` -> ``DELETED``""" ACTIVE = "ACTIVE" DELETED = "DELETED" @@ -7865,6 +7880,8 @@ def from_dict(cls, d: Dict[str, Any]) -> NodeType: @dataclass class PendingInstanceError: + """Error message of a failed pending instances""" + instance_id: Optional[str] = None message: Optional[str] = None diff --git a/databricks/sdk/service/dashboards.py b/databricks/sdk/service/dashboards.py index b1e915640..be50febad 100755 --- a/databricks/sdk/service/dashboards.py +++ b/databricks/sdk/service/dashboards.py @@ -594,12 +594,15 @@ class GenieMessage: `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response - generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is - completed. Results are in the `attachments` field. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has - been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to - rerun the query. * `CANCELLED`: Message has been cancelled.""" + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: + Message processing is completed. Results are in the `attachments` field. Get the SQL query + result by calling + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available + anymore. The user needs to rerun the query. Rerun the SQL query result by calling + [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: + Message has been cancelled.""" user_id: Optional[int] = None """ID of the user who created the message""" @@ -697,6 +700,10 @@ class GenieQueryAttachment: query_result_metadata: Optional[GenieResultMetadata] = None """Metadata associated with the query result.""" + statement_id: Optional[str] = None + """Statement Execution API statement id. Use [Get status, manifest, and result first + chunk](:method:statementexecution/getstatement) to get the full result data.""" + title: Optional[str] = None """Name of the query""" @@ -713,6 +720,8 @@ def as_dict(self) -> dict: body["query"] = self.query if self.query_result_metadata: body["query_result_metadata"] = self.query_result_metadata.as_dict() + if self.statement_id is not None: + body["statement_id"] = self.statement_id if self.title is not None: body["title"] = self.title return body @@ -730,6 +739,8 @@ def as_shallow_dict(self) -> dict: body["query"] = self.query if self.query_result_metadata: body["query_result_metadata"] = self.query_result_metadata + if self.statement_id is not None: + body["statement_id"] = self.statement_id if self.title is not None: body["title"] = self.title return body @@ -743,6 +754,7 @@ def from_dict(cls, d: Dict[str, Any]) -> GenieQueryAttachment: last_updated_timestamp=d.get("last_updated_timestamp", None), query=d.get("query", None), query_result_metadata=_from_dict(d, "query_result_metadata", GenieResultMetadata), + statement_id=d.get("statement_id", None), title=d.get("title", None), ) @@ -1062,6 +1074,7 @@ class MessageErrorType(Enum): CHAT_COMPLETION_NETWORK_EXCEPTION = "CHAT_COMPLETION_NETWORK_EXCEPTION" CONTENT_FILTER_EXCEPTION = "CONTENT_FILTER_EXCEPTION" CONTEXT_EXCEEDED_EXCEPTION = "CONTEXT_EXCEEDED_EXCEPTION" + COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION = "COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION" COULD_NOT_GET_UC_SCHEMA_EXCEPTION = "COULD_NOT_GET_UC_SCHEMA_EXCEPTION" DEPLOYMENT_NOT_FOUND_EXCEPTION = "DEPLOYMENT_NOT_FOUND_EXCEPTION" FUNCTIONS_NOT_AVAILABLE_EXCEPTION = "FUNCTIONS_NOT_AVAILABLE_EXCEPTION" @@ -1107,12 +1120,15 @@ class MessageStatus(Enum): `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response - generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is - completed. Results are in the `attachments` field. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has - been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to - rerun the query. * `CANCELLED`: Message has been cancelled.""" + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: + Message processing is completed. Results are in the `attachments` field. Get the SQL query + result by calling + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available + anymore. The user needs to rerun the query. Rerun the SQL query result by calling + [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: + Message has been cancelled.""" ASKING_AI = "ASKING_AI" CANCELLED = "CANCELLED" @@ -1917,7 +1933,8 @@ def execute_message_attachment_query( ) -> GenieGetMessageQueryResultResponse: """Execute message attachment SQL query. - Execute the SQL for a message query attachment. + Execute the SQL for a message query attachment. Use this API when the query attachment has expired and + needs to be re-executed. :param space_id: str Genie space ID @@ -1945,7 +1962,7 @@ def execute_message_attachment_query( def execute_message_query( self, space_id: str, conversation_id: str, message_id: str ) -> GenieGetMessageQueryResultResponse: - """Execute SQL query in a conversation message. + """[Deprecated] Execute SQL query in a conversation message. Execute the SQL query in the message. @@ -2059,7 +2076,7 @@ def get_message_query_result( def get_message_query_result_by_attachment( self, space_id: str, conversation_id: str, message_id: str, attachment_id: str ) -> GenieGetMessageQueryResultResponse: - """[deprecated] Get conversation message SQL query result. + """[Deprecated] Get conversation message SQL query result. Get the result of SQL query if the message has a query attachment. This is only available if a message has a query attachment and the message status is `EXECUTING_QUERY` OR `COMPLETED`. @@ -2088,9 +2105,9 @@ def get_message_query_result_by_attachment( return GenieGetMessageQueryResultResponse.from_dict(res) def get_space(self, space_id: str) -> GenieSpace: - """Get details of a Genie Space. + """Get Genie Space. - Get a Genie Space. + Get details of a Genie Space. :param space_id: str The ID associated with the Genie space diff --git a/databricks/sdk/service/files.py b/databricks/sdk/service/files.py index 394aa8697..8d60b842f 100755 --- a/databricks/sdk/service/files.py +++ b/databricks/sdk/service/files.py @@ -314,12 +314,14 @@ def from_dict(cls, d: Dict[str, Any]) -> DirectoryEntry: @dataclass class DownloadResponse: content_length: Optional[int] = None + """The length of the HTTP response body in bytes.""" content_type: Optional[str] = None contents: Optional[BinaryIO] = None last_modified: Optional[str] = None + """The last modified time of the file in HTTP-date (RFC 7231) format.""" def as_dict(self) -> dict: """Serializes the DownloadResponse into a dictionary suitable for use as a JSON request body.""" @@ -430,10 +432,12 @@ def from_dict(cls, d: Dict[str, Any]) -> GetDirectoryMetadataResponse: @dataclass class GetMetadataResponse: content_length: Optional[int] = None + """The length of the HTTP response body in bytes.""" content_type: Optional[str] = None last_modified: Optional[str] = None + """The last modified time of the file in HTTP-date (RFC 7231) format.""" def as_dict(self) -> dict: """Serializes the GetMetadataResponse into a dictionary suitable for use as a JSON request body.""" diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index 61a7b1bc7..b978b45c6 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -3,11 +3,15 @@ from __future__ import annotations import logging +import random +import time from dataclasses import dataclass +from datetime import timedelta from enum import Enum -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Callable, Dict, Iterator, List, Optional -from ._internal import _enum, _from_dict, _repeated_dict, _repeated_enum +from ..errors import OperationFailed +from ._internal import Wait, _enum, _from_dict, _repeated_dict, _repeated_enum _LOG = logging.getLogger("databricks.sdk") @@ -482,6 +486,197 @@ def from_dict(cls, d: Dict[str, Any]) -> CreateExperimentResponse: return cls(experiment_id=d.get("experiment_id", None)) +@dataclass +class CreateForecastingExperimentRequest: + train_data_path: str + """The three-level (fully qualified) name of a unity catalog table. This table serves as the + training data for the forecasting model.""" + + target_column: str + """Name of the column in the input training table that serves as the prediction target. The values + in this column will be used as the ground truth for model training.""" + + time_column: str + """Name of the column in the input training table that represents the timestamp of each row.""" + + data_granularity_unit: str + """The time unit of the input data granularity. Together with data_granularity_quantity field, this + defines the time interval between consecutive rows in the time series data. Possible values: * + 'W' (weeks) * 'D' / 'days' / 'day' * 'hours' / 'hour' / 'hr' / 'h' * 'm' / 'minute' / 'min' / + 'minutes' / 'T' * 'S' / 'seconds' / 'sec' / 'second' * 'M' / 'month' / 'months' * 'Q' / + 'quarter' / 'quarters' * 'Y' / 'year' / 'years'""" + + forecast_horizon: int + """The number of time steps into the future for which predictions should be made. This value + represents a multiple of data_granularity_unit and data_granularity_quantity determining how far + ahead the model will forecast.""" + + custom_weights_column: Optional[str] = None + """Name of the column in the input training table used to customize the weight for each time series + to calculate weighted metrics.""" + + data_granularity_quantity: Optional[int] = None + """The quantity of the input data granularity. Together with data_granularity_unit field, this + defines the time interval between consecutive rows in the time series data. For now, only 1 + second, 1/5/10/15/30 minutes, 1 hour, 1 day, 1 week, 1 month, 1 quarter, 1 year are supported.""" + + experiment_path: Optional[str] = None + """The path to the created experiment. This is the path where the experiment will be stored in the + workspace.""" + + holiday_regions: Optional[List[str]] = None + """Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now.""" + + max_runtime: Optional[int] = None + """The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically.""" + + prediction_data_path: Optional[str] = None + """The three-level (fully qualified) path to a unity catalog table. This table path serves to store + the predictions.""" + + primary_metric: Optional[str] = None + """The evaluation metric used to optimize the forecasting model.""" + + register_to: Optional[str] = None + """The three-level (fully qualified) path to a unity catalog model. This model path serves to store + the best model.""" + + split_column: Optional[str] = None + """Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to.""" + + timeseries_identifier_columns: Optional[List[str]] = None + """Name of the column in the input training table used to group the dataset to predict individual + time series""" + + training_frameworks: Optional[List[str]] = None + """The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', + 'DeepAR'. An empty list will include all supported frameworks.""" + + def as_dict(self) -> dict: + """Serializes the CreateForecastingExperimentRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.custom_weights_column is not None: + body["custom_weights_column"] = self.custom_weights_column + if self.data_granularity_quantity is not None: + body["data_granularity_quantity"] = self.data_granularity_quantity + if self.data_granularity_unit is not None: + body["data_granularity_unit"] = self.data_granularity_unit + if self.experiment_path is not None: + body["experiment_path"] = self.experiment_path + if self.forecast_horizon is not None: + body["forecast_horizon"] = self.forecast_horizon + if self.holiday_regions: + body["holiday_regions"] = [v for v in self.holiday_regions] + if self.max_runtime is not None: + body["max_runtime"] = self.max_runtime + if self.prediction_data_path is not None: + body["prediction_data_path"] = self.prediction_data_path + if self.primary_metric is not None: + body["primary_metric"] = self.primary_metric + if self.register_to is not None: + body["register_to"] = self.register_to + if self.split_column is not None: + body["split_column"] = self.split_column + if self.target_column is not None: + body["target_column"] = self.target_column + if self.time_column is not None: + body["time_column"] = self.time_column + if self.timeseries_identifier_columns: + body["timeseries_identifier_columns"] = [v for v in self.timeseries_identifier_columns] + if self.train_data_path is not None: + body["train_data_path"] = self.train_data_path + if self.training_frameworks: + body["training_frameworks"] = [v for v in self.training_frameworks] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CreateForecastingExperimentRequest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.custom_weights_column is not None: + body["custom_weights_column"] = self.custom_weights_column + if self.data_granularity_quantity is not None: + body["data_granularity_quantity"] = self.data_granularity_quantity + if self.data_granularity_unit is not None: + body["data_granularity_unit"] = self.data_granularity_unit + if self.experiment_path is not None: + body["experiment_path"] = self.experiment_path + if self.forecast_horizon is not None: + body["forecast_horizon"] = self.forecast_horizon + if self.holiday_regions: + body["holiday_regions"] = self.holiday_regions + if self.max_runtime is not None: + body["max_runtime"] = self.max_runtime + if self.prediction_data_path is not None: + body["prediction_data_path"] = self.prediction_data_path + if self.primary_metric is not None: + body["primary_metric"] = self.primary_metric + if self.register_to is not None: + body["register_to"] = self.register_to + if self.split_column is not None: + body["split_column"] = self.split_column + if self.target_column is not None: + body["target_column"] = self.target_column + if self.time_column is not None: + body["time_column"] = self.time_column + if self.timeseries_identifier_columns: + body["timeseries_identifier_columns"] = self.timeseries_identifier_columns + if self.train_data_path is not None: + body["train_data_path"] = self.train_data_path + if self.training_frameworks: + body["training_frameworks"] = self.training_frameworks + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CreateForecastingExperimentRequest: + """Deserializes the CreateForecastingExperimentRequest from a dictionary.""" + return cls( + custom_weights_column=d.get("custom_weights_column", None), + data_granularity_quantity=d.get("data_granularity_quantity", None), + data_granularity_unit=d.get("data_granularity_unit", None), + experiment_path=d.get("experiment_path", None), + forecast_horizon=d.get("forecast_horizon", None), + holiday_regions=d.get("holiday_regions", None), + max_runtime=d.get("max_runtime", None), + prediction_data_path=d.get("prediction_data_path", None), + primary_metric=d.get("primary_metric", None), + register_to=d.get("register_to", None), + split_column=d.get("split_column", None), + target_column=d.get("target_column", None), + time_column=d.get("time_column", None), + timeseries_identifier_columns=d.get("timeseries_identifier_columns", None), + train_data_path=d.get("train_data_path", None), + training_frameworks=d.get("training_frameworks", None), + ) + + +@dataclass +class CreateForecastingExperimentResponse: + experiment_id: Optional[str] = None + """The unique ID of the created forecasting experiment""" + + def as_dict(self) -> dict: + """Serializes the CreateForecastingExperimentResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CreateForecastingExperimentResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CreateForecastingExperimentResponse: + """Deserializes the CreateForecastingExperimentResponse from a dictionary.""" + return cls(experiment_id=d.get("experiment_id", None)) + + @dataclass class CreateModelRequest: name: str @@ -1800,6 +1995,60 @@ def from_dict(cls, d: Dict[str, Any]) -> FileInfo: return cls(file_size=d.get("file_size", None), is_dir=d.get("is_dir", None), path=d.get("path", None)) +@dataclass +class ForecastingExperiment: + """Represents a forecasting experiment with its unique identifier, URL, and state.""" + + experiment_id: Optional[str] = None + """The unique ID for the forecasting experiment.""" + + experiment_page_url: Optional[str] = None + """The URL to the forecasting experiment page.""" + + state: Optional[ForecastingExperimentState] = None + """The current state of the forecasting experiment.""" + + def as_dict(self) -> dict: + """Serializes the ForecastingExperiment into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + if self.experiment_page_url is not None: + body["experiment_page_url"] = self.experiment_page_url + if self.state is not None: + body["state"] = self.state.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ForecastingExperiment into a shallow dictionary of its immediate attributes.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + if self.experiment_page_url is not None: + body["experiment_page_url"] = self.experiment_page_url + if self.state is not None: + body["state"] = self.state + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ForecastingExperiment: + """Deserializes the ForecastingExperiment from a dictionary.""" + return cls( + experiment_id=d.get("experiment_id", None), + experiment_page_url=d.get("experiment_page_url", None), + state=_enum(d, "state", ForecastingExperimentState), + ) + + +class ForecastingExperimentState(Enum): + + CANCELLED = "CANCELLED" + FAILED = "FAILED" + PENDING = "PENDING" + RUNNING = "RUNNING" + SUCCEEDED = "SUCCEEDED" + + @dataclass class GetExperimentByNameResponse: experiment: Optional[Experiment] = None @@ -6705,6 +6954,231 @@ def update_run( return UpdateRunResponse.from_dict(res) +class ForecastingAPI: + """The Forecasting API allows you to create and get serverless forecasting experiments""" + + def __init__(self, api_client): + self._api = api_client + + def wait_get_experiment_forecasting_succeeded( + self, + experiment_id: str, + timeout=timedelta(minutes=120), + callback: Optional[Callable[[ForecastingExperiment], None]] = None, + ) -> ForecastingExperiment: + deadline = time.time() + timeout.total_seconds() + target_states = (ForecastingExperimentState.SUCCEEDED,) + failure_states = ( + ForecastingExperimentState.FAILED, + ForecastingExperimentState.CANCELLED, + ) + status_message = "polling..." + attempt = 1 + while time.time() < deadline: + poll = self.get_experiment(experiment_id=experiment_id) + status = poll.state + status_message = f"current status: {status}" + if status in target_states: + return poll + if callback: + callback(poll) + if status in failure_states: + msg = f"failed to reach SUCCEEDED, got {status}: {status_message}" + raise OperationFailed(msg) + prefix = f"experiment_id={experiment_id}" + sleep = attempt + if sleep > 10: + # sleep 10s max per attempt + sleep = 10 + _LOG.debug(f"{prefix}: ({status}) {status_message} (sleeping ~{sleep}s)") + time.sleep(sleep + random.random()) + attempt += 1 + raise TimeoutError(f"timed out after {timeout}: {status_message}") + + def create_experiment( + self, + train_data_path: str, + target_column: str, + time_column: str, + data_granularity_unit: str, + forecast_horizon: int, + *, + custom_weights_column: Optional[str] = None, + data_granularity_quantity: Optional[int] = None, + experiment_path: Optional[str] = None, + holiday_regions: Optional[List[str]] = None, + max_runtime: Optional[int] = None, + prediction_data_path: Optional[str] = None, + primary_metric: Optional[str] = None, + register_to: Optional[str] = None, + split_column: Optional[str] = None, + timeseries_identifier_columns: Optional[List[str]] = None, + training_frameworks: Optional[List[str]] = None, + ) -> Wait[ForecastingExperiment]: + """Create a forecasting experiment. + + Creates a serverless forecasting experiment. Returns the experiment ID. + + :param train_data_path: str + The three-level (fully qualified) name of a unity catalog table. This table serves as the training + data for the forecasting model. + :param target_column: str + Name of the column in the input training table that serves as the prediction target. The values in + this column will be used as the ground truth for model training. + :param time_column: str + Name of the column in the input training table that represents the timestamp of each row. + :param data_granularity_unit: str + The time unit of the input data granularity. Together with data_granularity_quantity field, this + defines the time interval between consecutive rows in the time series data. Possible values: * 'W' + (weeks) * 'D' / 'days' / 'day' * 'hours' / 'hour' / 'hr' / 'h' * 'm' / 'minute' / 'min' / 'minutes' + / 'T' * 'S' / 'seconds' / 'sec' / 'second' * 'M' / 'month' / 'months' * 'Q' / 'quarter' / 'quarters' + * 'Y' / 'year' / 'years' + :param forecast_horizon: int + The number of time steps into the future for which predictions should be made. This value represents + a multiple of data_granularity_unit and data_granularity_quantity determining how far ahead the + model will forecast. + :param custom_weights_column: str (optional) + Name of the column in the input training table used to customize the weight for each time series to + calculate weighted metrics. + :param data_granularity_quantity: int (optional) + The quantity of the input data granularity. Together with data_granularity_unit field, this defines + the time interval between consecutive rows in the time series data. For now, only 1 second, + 1/5/10/15/30 minutes, 1 hour, 1 day, 1 week, 1 month, 1 quarter, 1 year are supported. + :param experiment_path: str (optional) + The path to the created experiment. This is the path where the experiment will be stored in the + workspace. + :param holiday_regions: List[str] (optional) + Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now. + :param max_runtime: int (optional) + The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically. + :param prediction_data_path: str (optional) + The three-level (fully qualified) path to a unity catalog table. This table path serves to store the + predictions. + :param primary_metric: str (optional) + The evaluation metric used to optimize the forecasting model. + :param register_to: str (optional) + The three-level (fully qualified) path to a unity catalog model. This model path serves to store the + best model. + :param split_column: str (optional) + Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to. + :param timeseries_identifier_columns: List[str] (optional) + Name of the column in the input training table used to group the dataset to predict individual time + series + :param training_frameworks: List[str] (optional) + The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', 'DeepAR'. + An empty list will include all supported frameworks. + + :returns: + Long-running operation waiter for :class:`ForecastingExperiment`. + See :method:wait_get_experiment_forecasting_succeeded for more details. + """ + body = {} + if custom_weights_column is not None: + body["custom_weights_column"] = custom_weights_column + if data_granularity_quantity is not None: + body["data_granularity_quantity"] = data_granularity_quantity + if data_granularity_unit is not None: + body["data_granularity_unit"] = data_granularity_unit + if experiment_path is not None: + body["experiment_path"] = experiment_path + if forecast_horizon is not None: + body["forecast_horizon"] = forecast_horizon + if holiday_regions is not None: + body["holiday_regions"] = [v for v in holiday_regions] + if max_runtime is not None: + body["max_runtime"] = max_runtime + if prediction_data_path is not None: + body["prediction_data_path"] = prediction_data_path + if primary_metric is not None: + body["primary_metric"] = primary_metric + if register_to is not None: + body["register_to"] = register_to + if split_column is not None: + body["split_column"] = split_column + if target_column is not None: + body["target_column"] = target_column + if time_column is not None: + body["time_column"] = time_column + if timeseries_identifier_columns is not None: + body["timeseries_identifier_columns"] = [v for v in timeseries_identifier_columns] + if train_data_path is not None: + body["train_data_path"] = train_data_path + if training_frameworks is not None: + body["training_frameworks"] = [v for v in training_frameworks] + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + op_response = self._api.do("POST", "/api/2.0/automl/create-forecasting-experiment", body=body, headers=headers) + return Wait( + self.wait_get_experiment_forecasting_succeeded, + response=CreateForecastingExperimentResponse.from_dict(op_response), + experiment_id=op_response["experiment_id"], + ) + + def create_experiment_and_wait( + self, + train_data_path: str, + target_column: str, + time_column: str, + data_granularity_unit: str, + forecast_horizon: int, + *, + custom_weights_column: Optional[str] = None, + data_granularity_quantity: Optional[int] = None, + experiment_path: Optional[str] = None, + holiday_regions: Optional[List[str]] = None, + max_runtime: Optional[int] = None, + prediction_data_path: Optional[str] = None, + primary_metric: Optional[str] = None, + register_to: Optional[str] = None, + split_column: Optional[str] = None, + timeseries_identifier_columns: Optional[List[str]] = None, + training_frameworks: Optional[List[str]] = None, + timeout=timedelta(minutes=120), + ) -> ForecastingExperiment: + return self.create_experiment( + custom_weights_column=custom_weights_column, + data_granularity_quantity=data_granularity_quantity, + data_granularity_unit=data_granularity_unit, + experiment_path=experiment_path, + forecast_horizon=forecast_horizon, + holiday_regions=holiday_regions, + max_runtime=max_runtime, + prediction_data_path=prediction_data_path, + primary_metric=primary_metric, + register_to=register_to, + split_column=split_column, + target_column=target_column, + time_column=time_column, + timeseries_identifier_columns=timeseries_identifier_columns, + train_data_path=train_data_path, + training_frameworks=training_frameworks, + ).result(timeout=timeout) + + def get_experiment(self, experiment_id: str) -> ForecastingExperiment: + """Get a forecasting experiment. + + Public RPC to get forecasting experiment + + :param experiment_id: str + The unique ID of a forecasting experiment + + :returns: :class:`ForecastingExperiment` + """ + + headers = { + "Accept": "application/json", + } + + res = self._api.do("GET", f"/api/2.0/automl/get-forecasting-experiment/{experiment_id}", headers=headers) + return ForecastingExperiment.from_dict(res) + + class ModelRegistryAPI: """Note: This API reference documents APIs for the Workspace Model Registry. Databricks recommends using [Models in Unity Catalog](/api/workspace/registeredmodels) instead. Models in Unity Catalog provides diff --git a/databricks/sdk/service/oauth2.py b/databricks/sdk/service/oauth2.py index 366f282f4..928610d04 100755 --- a/databricks/sdk/service/oauth2.py +++ b/databricks/sdk/service/oauth2.py @@ -776,13 +776,6 @@ class OidcFederationPolicy: endpoint. Databricks strongly recommends relying on your issuer’s well known endpoint for discovering public keys.""" - jwks_uri: Optional[str] = None - """URL of the public keys used to validate the signature of federated tokens, in JWKS format. Most - use cases should not need to specify this field. If jwks_uri and jwks_json are both unspecified - (recommended), Databricks automatically fetches the public keys from your issuer’s well known - endpoint. Databricks strongly recommends relying on your issuer’s well known endpoint for - discovering public keys.""" - subject: Optional[str] = None """The required token subject, as specified in the subject claim of federated tokens. Must be specified for service principal federation policies. Must not be specified for account @@ -800,8 +793,6 @@ def as_dict(self) -> dict: body["issuer"] = self.issuer if self.jwks_json is not None: body["jwks_json"] = self.jwks_json - if self.jwks_uri is not None: - body["jwks_uri"] = self.jwks_uri if self.subject is not None: body["subject"] = self.subject if self.subject_claim is not None: @@ -817,8 +808,6 @@ def as_shallow_dict(self) -> dict: body["issuer"] = self.issuer if self.jwks_json is not None: body["jwks_json"] = self.jwks_json - if self.jwks_uri is not None: - body["jwks_uri"] = self.jwks_uri if self.subject is not None: body["subject"] = self.subject if self.subject_claim is not None: @@ -832,7 +821,6 @@ def from_dict(cls, d: Dict[str, Any]) -> OidcFederationPolicy: audiences=d.get("audiences", None), issuer=d.get("issuer", None), jwks_json=d.get("jwks_json", None), - jwks_uri=d.get("jwks_uri", None), subject=d.get("subject", None), subject_claim=d.get("subject_claim", None), ) diff --git a/docs/account/iam/service_principals.rst b/docs/account/iam/service_principals.rst index 2823c8d31..8899ee8ed 100644 --- a/docs/account/iam/service_principals.rst +++ b/docs/account/iam/service_principals.rst @@ -23,10 +23,7 @@ a = AccountClient() - sp_create = a.service_principals.create(active=True, display_name=f"sdk-{time.time_ns()}") - - # cleanup - a.service_principals.delete(id=sp_create.id) + spn = a.service_principals.create(display_name=f"sdk-{time.time_ns()}") Create a service principal. diff --git a/docs/account/iam/workspace_assignment.rst b/docs/account/iam/workspace_assignment.rst index 6f4c66be5..745bd75da 100644 --- a/docs/account/iam/workspace_assignment.rst +++ b/docs/account/iam/workspace_assignment.rst @@ -47,9 +47,9 @@ a = AccountClient() - workspace_id = os.environ["TEST_WORKSPACE_ID"] + workspace_id = os.environ["DUMMY_WORKSPACE_ID"] - all = a.workspace_assignment.list(list=workspace_id) + all = a.workspace_assignment.list(workspace_id=workspace_id) Get permission assignments. @@ -80,9 +80,9 @@ spn_id = spn.id - workspace_id = os.environ["DUMMY_WORKSPACE_ID"] + workspace_id = os.environ["TEST_WORKSPACE_ID"] - _ = a.workspace_assignment.update( + a.workspace_assignment.update( workspace_id=workspace_id, principal_id=spn_id, permissions=[iam.WorkspacePermission.USER], diff --git a/docs/account/provisioning/credentials.rst b/docs/account/provisioning/credentials.rst index d023d4f1f..e307588f1 100644 --- a/docs/account/provisioning/credentials.rst +++ b/docs/account/provisioning/credentials.rst @@ -24,15 +24,15 @@ a = AccountClient() - role = a.credentials.create( + creds = a.credentials.create( credentials_name=f"sdk-{time.time_ns()}", aws_credentials=provisioning.CreateCredentialAwsCredentials( - sts_role=provisioning.CreateCredentialStsRole(role_arn=os.environ["TEST_CROSSACCOUNT_ARN"]) + sts_role=provisioning.CreateCredentialStsRole(role_arn=os.environ["TEST_LOGDELIVERY_ARN"]) ), ) # cleanup - a.credentials.delete(credentials_id=role.credentials_id) + a.credentials.delete(credentials_id=creds.credentials_id) Create credential configuration. diff --git a/docs/account/provisioning/storage.rst b/docs/account/provisioning/storage.rst index b8e144f8c..010795885 100644 --- a/docs/account/provisioning/storage.rst +++ b/docs/account/provisioning/storage.rst @@ -16,7 +16,6 @@ .. code-block:: - import os import time from databricks.sdk import AccountClient @@ -24,13 +23,13 @@ a = AccountClient() - storage = a.storage.create( + bucket = a.storage.create( storage_configuration_name=f"sdk-{time.time_ns()}", - root_bucket_info=provisioning.RootBucketInfo(bucket_name=os.environ["TEST_ROOT_BUCKET"]), + root_bucket_info=provisioning.RootBucketInfo(bucket_name=f"sdk-{time.time_ns()}"), ) # cleanup - a.storage.delete(storage_configuration_id=storage.storage_configuration_id) + a.storage.delete(storage_configuration_id=bucket.storage_configuration_id) Create new storage configuration. diff --git a/docs/dbdataclasses/compute.rst b/docs/dbdataclasses/compute.rst index 3be80b79c..81fc85e30 100644 --- a/docs/dbdataclasses/compute.rst +++ b/docs/dbdataclasses/compute.rst @@ -405,6 +405,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DiskTypeAzureDiskVolumeType + All Azure Disk types that Databricks supports. See https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks + .. py:attribute:: PREMIUM_LRS :value: "PREMIUM_LRS" @@ -413,6 +415,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DiskTypeEbsVolumeType + All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. + .. py:attribute:: GENERAL_PURPOSE_SSD :value: "GENERAL_PURPOSE_SSD" @@ -729,8 +733,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolAwsAttributesAvailability - Availability type used for the spot nodes. - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability + The set of AWS availability types supported when setting up nodes for a cluster. .. py:attribute:: ON_DEMAND :value: "ON_DEMAND" @@ -744,8 +747,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolAzureAttributesAvailability - Shows the Availability type used for the spot nodes. - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability + The set of Azure availability types supported when setting up nodes for a cluster. .. py:attribute:: ON_DEMAND_AZURE :value: "ON_DEMAND_AZURE" @@ -785,7 +787,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolState - Current state of the instance pool. + The state of a Cluster. The current allowable state transitions are as follows: + - ``ACTIVE`` -> ``STOPPED`` - ``ACTIVE`` -> ``DELETED`` - ``STOPPED`` -> ``ACTIVE`` - ``STOPPED`` -> ``DELETED`` .. py:attribute:: ACTIVE :value: "ACTIVE" diff --git a/docs/dbdataclasses/dashboards.rst b/docs/dbdataclasses/dashboards.rst index b68b1f1b6..d1639d266 100644 --- a/docs/dbdataclasses/dashboards.rst +++ b/docs/dbdataclasses/dashboards.rst @@ -133,6 +133,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: CONTEXT_EXCEEDED_EXCEPTION :value: "CONTEXT_EXCEEDED_EXCEPTION" + .. py:attribute:: COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION + :value: "COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION" + .. py:attribute:: COULD_NOT_GET_UC_SCHEMA_EXCEPTION :value: "COULD_NOT_GET_UC_SCHEMA_EXCEPTION" @@ -246,7 +249,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: MessageStatus - MessageStatus. The possible values are: * `FETCHING_METADATA`: Fetching metadata from the data sources. * `FILTERING_CONTEXT`: Running smart context step to determine relevant context. * `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is completed. Results are in the `attachments` field. Get the SQL query result by calling [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to rerun the query. * `CANCELLED`: Message has been cancelled. + MessageStatus. The possible values are: * `FETCHING_METADATA`: Fetching metadata from the data sources. * `FILTERING_CONTEXT`: Running smart context step to determine relevant context. * `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is completed. Results are in the `attachments` field. Get the SQL query result by calling [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to rerun the query. Rerun the SQL query result by calling [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: Message has been cancelled. .. py:attribute:: ASKING_AI :value: "ASKING_AI" diff --git a/docs/dbdataclasses/ml.rst b/docs/dbdataclasses/ml.rst index b176e56c4..860a4ffbc 100644 --- a/docs/dbdataclasses/ml.rst +++ b/docs/dbdataclasses/ml.rst @@ -92,6 +92,14 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: CreateForecastingExperimentRequest + :members: + :undoc-members: + +.. autoclass:: CreateForecastingExperimentResponse + :members: + :undoc-members: + .. autoclass:: CreateModelRequest :members: :undoc-members: @@ -263,6 +271,27 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: ForecastingExperiment + :members: + :undoc-members: + +.. py:class:: ForecastingExperimentState + + .. py:attribute:: CANCELLED + :value: "CANCELLED" + + .. py:attribute:: FAILED + :value: "FAILED" + + .. py:attribute:: PENDING + :value: "PENDING" + + .. py:attribute:: RUNNING + :value: "RUNNING" + + .. py:attribute:: SUCCEEDED + :value: "SUCCEEDED" + .. autoclass:: GetExperimentByNameResponse :members: :undoc-members: diff --git a/docs/workspace/catalog/catalogs.rst b/docs/workspace/catalog/catalogs.rst index 60959cad4..2505551cd 100644 --- a/docs/workspace/catalog/catalogs.rst +++ b/docs/workspace/catalog/catalogs.rst @@ -24,10 +24,10 @@ w = WorkspaceClient() - created = w.catalogs.create(name=f"sdk-{time.time_ns()}") + created_catalog = w.catalogs.create(name=f"sdk-{time.time_ns()}") # cleanup - w.catalogs.delete(name=created.name, force=True) + w.catalogs.delete(name=created_catalog.name, force=True) Create a catalog. diff --git a/docs/workspace/catalog/external_locations.rst b/docs/workspace/catalog/external_locations.rst index 980467306..5828bf245 100644 --- a/docs/workspace/catalog/external_locations.rst +++ b/docs/workspace/catalog/external_locations.rst @@ -30,22 +30,20 @@ w = WorkspaceClient() - storage_credential = w.storage_credentials.create( + credential = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), - comment="created via SDK", ) - external_location = w.external_locations.create( + created = w.external_locations.create( name=f"sdk-{time.time_ns()}", - credential_name=storage_credential.name, - comment="created via SDK", - url="s3://" + os.environ["TEST_BUCKET"] + "/" + f"sdk-{time.time_ns()}", + credential_name=credential.name, + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f"sdk-{time.time_ns()}"), ) # cleanup - w.storage_credentials.delete(name=storage_credential.name) - w.external_locations.delete(name=external_location.name) + w.storage_credentials.delete(name=credential.name) + w.external_locations.delete(name=created.name) Create an external location. @@ -109,20 +107,20 @@ credential = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) created = w.external_locations.create( name=f"sdk-{time.time_ns()}", credential_name=credential.name, - url=f's3://{os.environ["TEST_BUCKET"]}/sdk-{time.time_ns()}', + url="s3://%s/%s" % (os.environ["TEST_BUCKET"], f"sdk-{time.time_ns()}"), ) - _ = w.external_locations.get(get=created.name) + _ = w.external_locations.get(name=created.name) # cleanup - w.storage_credentials.delete(delete=credential.name) - w.external_locations.delete(delete=created.name) + w.storage_credentials.delete(name=credential.name) + w.external_locations.delete(name=created.name) Get an external location. @@ -146,11 +144,10 @@ .. code-block:: from databricks.sdk import WorkspaceClient - from databricks.sdk.service import catalog w = WorkspaceClient() - all = w.external_locations.list(catalog.ListExternalLocationsRequest()) + all = w.external_locations.list() List external locations. diff --git a/docs/workspace/catalog/storage_credentials.rst b/docs/workspace/catalog/storage_credentials.rst index ea2aece71..9a5ed0a46 100644 --- a/docs/workspace/catalog/storage_credentials.rst +++ b/docs/workspace/catalog/storage_credentials.rst @@ -30,13 +30,13 @@ w = WorkspaceClient() - created = w.storage_credentials.create( + credential = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) # cleanup - w.storage_credentials.delete(delete=created.name) + w.storage_credentials.delete(name=credential.name) Create a storage credential. @@ -96,13 +96,13 @@ created = w.storage_credentials.create( name=f"sdk-{time.time_ns()}", - aws_iam_role=catalog.AwsIamRoleRequest(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), + aws_iam_role=catalog.AwsIamRole(role_arn=os.environ["TEST_METASTORE_DATA_ACCESS_ARN"]), ) - by_name = w.storage_credentials.get(name=created.name) + by_name = w.storage_credentials.get(get=created.name) # cleanup - w.storage_credentials.delete(name=created.name) + w.storage_credentials.delete(delete=created.name) Get a credential. @@ -123,10 +123,11 @@ .. code-block:: from databricks.sdk import WorkspaceClient + from databricks.sdk.service import catalog w = WorkspaceClient() - all = w.storage_credentials.list() + all = w.storage_credentials.list(catalog.ListStorageCredentialsRequest()) List credentials. diff --git a/docs/workspace/compute/clusters.rst b/docs/workspace/compute/clusters.rst index 528cff321..79d025c82 100644 --- a/docs/workspace/compute/clusters.rst +++ b/docs/workspace/compute/clusters.rst @@ -700,11 +700,10 @@ .. code-block:: from databricks.sdk import WorkspaceClient - from databricks.sdk.service import compute w = WorkspaceClient() - all = w.clusters.list(compute.ListClustersRequest()) + nodes = w.clusters.list_node_types() List clusters. diff --git a/docs/workspace/dashboards/genie.rst b/docs/workspace/dashboards/genie.rst index 6c0e91751..eb92d299f 100644 --- a/docs/workspace/dashboards/genie.rst +++ b/docs/workspace/dashboards/genie.rst @@ -35,7 +35,8 @@ Execute message attachment SQL query. - Execute the SQL for a message query attachment. + Execute the SQL for a message query attachment. Use this API when the query attachment has expired and + needs to be re-executed. :param space_id: str Genie space ID @@ -51,7 +52,7 @@ .. py:method:: execute_message_query(space_id: str, conversation_id: str, message_id: str) -> GenieGetMessageQueryResultResponse - Execute SQL query in a conversation message. + [Deprecated] Execute SQL query in a conversation message. Execute the SQL query in the message. @@ -119,7 +120,7 @@ .. py:method:: get_message_query_result_by_attachment(space_id: str, conversation_id: str, message_id: str, attachment_id: str) -> GenieGetMessageQueryResultResponse - [deprecated] Get conversation message SQL query result. + [Deprecated] Get conversation message SQL query result. Get the result of SQL query if the message has a query attachment. This is only available if a message has a query attachment and the message status is `EXECUTING_QUERY` OR `COMPLETED`. @@ -138,9 +139,9 @@ .. py:method:: get_space(space_id: str) -> GenieSpace - Get details of a Genie Space. + Get Genie Space. - Get a Genie Space. + Get details of a Genie Space. :param space_id: str The ID associated with the Genie space diff --git a/docs/workspace/iam/current_user.rst b/docs/workspace/iam/current_user.rst index 1df3adf9f..bf739025c 100644 --- a/docs/workspace/iam/current_user.rst +++ b/docs/workspace/iam/current_user.rst @@ -17,7 +17,7 @@ w = WorkspaceClient() - me2 = w.current_user.me() + me = w.current_user.me() Get current user info. diff --git a/docs/workspace/iam/groups.rst b/docs/workspace/iam/groups.rst index 8eb4ccbe2..14306158d 100644 --- a/docs/workspace/iam/groups.rst +++ b/docs/workspace/iam/groups.rst @@ -71,6 +71,9 @@ group = w.groups.create(display_name=f"sdk-{time.time_ns()}") w.groups.delete(id=group.id) + + # cleanup + w.groups.delete(id=group.id) Delete a group. diff --git a/docs/workspace/iam/permissions.rst b/docs/workspace/iam/permissions.rst index 8d504eb37..6cd5b269b 100644 --- a/docs/workspace/iam/permissions.rst +++ b/docs/workspace/iam/permissions.rst @@ -71,7 +71,7 @@ obj = w.workspace.get_status(path=notebook_path) - levels = w.permissions.get_permission_levels(request_object_type="notebooks", request_object_id="%d" % (obj.object_id)) + _ = w.permissions.get(request_object_type="notebooks", request_object_id="%d" % (obj.object_id)) Get object permissions. diff --git a/docs/workspace/jobs/jobs.rst b/docs/workspace/jobs/jobs.rst index 923b05901..184ff9bd9 100644 --- a/docs/workspace/jobs/jobs.rst +++ b/docs/workspace/jobs/jobs.rst @@ -362,21 +362,23 @@ w.clusters.ensure_cluster_is_running(os.environ["DATABRICKS_CLUSTER_ID"]) and os.environ["DATABRICKS_CLUSTER_ID"] ) - run = w.jobs.submit( - run_name=f"sdk-{time.time_ns()}", + created_job = w.jobs.create( + name=f"sdk-{time.time_ns()}", tasks=[ - jobs.SubmitTask( + jobs.Task( + description="test", existing_cluster_id=cluster_id, notebook_task=jobs.NotebookTask(notebook_path=notebook_path), - task_key=f"sdk-{time.time_ns()}", + task_key="test", + timeout_seconds=0, ) ], - ).result() + ) - output = w.jobs.get_run_output(run_id=run.tasks[0].run_id) + by_id = w.jobs.get(job_id=created_job.job_id) # cleanup - w.jobs.delete_run(run_id=run.run_id) + w.jobs.delete(job_id=created_job.job_id) Get a single job. diff --git a/docs/workspace/ml/forecasting.rst b/docs/workspace/ml/forecasting.rst new file mode 100644 index 000000000..5f5bddd8a --- /dev/null +++ b/docs/workspace/ml/forecasting.rst @@ -0,0 +1,87 @@ +``w.forecasting``: Forecasting +============================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ForecastingAPI + + The Forecasting API allows you to create and get serverless forecasting experiments + + .. py:method:: create_experiment(train_data_path: str, target_column: str, time_column: str, data_granularity_unit: str, forecast_horizon: int [, custom_weights_column: Optional[str], data_granularity_quantity: Optional[int], experiment_path: Optional[str], holiday_regions: Optional[List[str]], max_runtime: Optional[int], prediction_data_path: Optional[str], primary_metric: Optional[str], register_to: Optional[str], split_column: Optional[str], timeseries_identifier_columns: Optional[List[str]], training_frameworks: Optional[List[str]]]) -> Wait[ForecastingExperiment] + + Create a forecasting experiment. + + Creates a serverless forecasting experiment. Returns the experiment ID. + + :param train_data_path: str + The three-level (fully qualified) name of a unity catalog table. This table serves as the training + data for the forecasting model. + :param target_column: str + Name of the column in the input training table that serves as the prediction target. The values in + this column will be used as the ground truth for model training. + :param time_column: str + Name of the column in the input training table that represents the timestamp of each row. + :param data_granularity_unit: str + The time unit of the input data granularity. Together with data_granularity_quantity field, this + defines the time interval between consecutive rows in the time series data. Possible values: * 'W' + (weeks) * 'D' / 'days' / 'day' * 'hours' / 'hour' / 'hr' / 'h' * 'm' / 'minute' / 'min' / 'minutes' + / 'T' * 'S' / 'seconds' / 'sec' / 'second' * 'M' / 'month' / 'months' * 'Q' / 'quarter' / 'quarters' + * 'Y' / 'year' / 'years' + :param forecast_horizon: int + The number of time steps into the future for which predictions should be made. This value represents + a multiple of data_granularity_unit and data_granularity_quantity determining how far ahead the + model will forecast. + :param custom_weights_column: str (optional) + Name of the column in the input training table used to customize the weight for each time series to + calculate weighted metrics. + :param data_granularity_quantity: int (optional) + The quantity of the input data granularity. Together with data_granularity_unit field, this defines + the time interval between consecutive rows in the time series data. For now, only 1 second, + 1/5/10/15/30 minutes, 1 hour, 1 day, 1 week, 1 month, 1 quarter, 1 year are supported. + :param experiment_path: str (optional) + The path to the created experiment. This is the path where the experiment will be stored in the + workspace. + :param holiday_regions: List[str] (optional) + Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now. + :param max_runtime: int (optional) + The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically. + :param prediction_data_path: str (optional) + The three-level (fully qualified) path to a unity catalog table. This table path serves to store the + predictions. + :param primary_metric: str (optional) + The evaluation metric used to optimize the forecasting model. + :param register_to: str (optional) + The three-level (fully qualified) path to a unity catalog model. This model path serves to store the + best model. + :param split_column: str (optional) + Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to. + :param timeseries_identifier_columns: List[str] (optional) + Name of the column in the input training table used to group the dataset to predict individual time + series + :param training_frameworks: List[str] (optional) + The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', 'DeepAR'. + An empty list will include all supported frameworks. + + :returns: + Long-running operation waiter for :class:`ForecastingExperiment`. + See :method:wait_get_experiment_forecasting_succeeded for more details. + + + .. py:method:: create_experiment_and_wait(train_data_path: str, target_column: str, time_column: str, data_granularity_unit: str, forecast_horizon: int [, custom_weights_column: Optional[str], data_granularity_quantity: Optional[int], experiment_path: Optional[str], holiday_regions: Optional[List[str]], max_runtime: Optional[int], prediction_data_path: Optional[str], primary_metric: Optional[str], register_to: Optional[str], split_column: Optional[str], timeseries_identifier_columns: Optional[List[str]], training_frameworks: Optional[List[str]], timeout: datetime.timedelta = 2:00:00]) -> ForecastingExperiment + + + .. py:method:: get_experiment(experiment_id: str) -> ForecastingExperiment + + Get a forecasting experiment. + + Public RPC to get forecasting experiment + + :param experiment_id: str + The unique ID of a forecasting experiment + + :returns: :class:`ForecastingExperiment` + + + .. py:method:: wait_get_experiment_forecasting_succeeded(experiment_id: str, timeout: datetime.timedelta = 2:00:00, callback: Optional[Callable[[ForecastingExperiment], None]]) -> ForecastingExperiment diff --git a/docs/workspace/ml/index.rst b/docs/workspace/ml/index.rst index 1a713eb57..9114a2f19 100644 --- a/docs/workspace/ml/index.rst +++ b/docs/workspace/ml/index.rst @@ -8,4 +8,5 @@ Create and manage experiments, features, and other machine learning artifacts :maxdepth: 1 experiments + forecasting model_registry \ No newline at end of file diff --git a/docs/workspace/ml/model_registry.rst b/docs/workspace/ml/model_registry.rst index 0f6b49ac8..b56fba95f 100644 --- a/docs/workspace/ml/model_registry.rst +++ b/docs/workspace/ml/model_registry.rst @@ -94,7 +94,7 @@ w = WorkspaceClient() - model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") + created = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") Create a model. @@ -127,7 +127,7 @@ model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") - mv = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") Create a model version. @@ -773,14 +773,13 @@ w = WorkspaceClient() - model = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") + created = w.model_registry.create_model(name=f"sdk-{time.time_ns()}") - created = w.model_registry.create_model_version(name=model.registered_model.name, source="dbfs:/tmp") + model = w.model_registry.get_model(name=created.registered_model.name) - w.model_registry.update_model_version( + w.model_registry.update_model( + name=model.registered_model_databricks.name, description=f"sdk-{time.time_ns()}", - name=created.model_version.name, - version=created.model_version.version, ) Update model. diff --git a/docs/workspace/sharing/providers.rst b/docs/workspace/sharing/providers.rst index 263545400..d78dd62a0 100644 --- a/docs/workspace/sharing/providers.rst +++ b/docs/workspace/sharing/providers.rst @@ -108,12 +108,25 @@ .. code-block:: + import time + from databricks.sdk import WorkspaceClient - from databricks.sdk.service import sharing w = WorkspaceClient() - all = w.providers.list(sharing.ListProvidersRequest()) + public_share_recipient = """{ + "shareCredentialsVersion":1, + "bearerToken":"dapiabcdefghijklmonpqrstuvwxyz", + "endpoint":"https://sharing.delta.io/delta-sharing/" + } + """ + + created = w.providers.create(name=f"sdk-{time.time_ns()}", recipient_profile_str=public_share_recipient) + + shares = w.providers.list_shares(name=created.name) + + # cleanup + w.providers.delete(name=created.name) List providers. diff --git a/docs/workspace/sql/queries.rst b/docs/workspace/sql/queries.rst index 4cc9b5b52..fe5442485 100644 --- a/docs/workspace/sql/queries.rst +++ b/docs/workspace/sql/queries.rst @@ -29,7 +29,7 @@ display_name=f"sdk-{time.time_ns()}", warehouse_id=srcs[0].warehouse_id, description="test query from Go SDK", - query_text="SHOW TABLES", + query_text="SELECT 1", ) ) diff --git a/docs/workspace/workspace/workspace.rst b/docs/workspace/workspace/workspace.rst index 3eae91432..abfc30860 100644 --- a/docs/workspace/workspace/workspace.rst +++ b/docs/workspace/workspace/workspace.rst @@ -188,7 +188,7 @@ content=base64.b64encode(("CREATE LIVE TABLE dlt_sample AS SELECT 1").encode()).decode(), format=workspace.ImportFormat.SOURCE, language=workspace.Language.SQL, - overwrite=True, + overwrite=true_, path=notebook_path, )