Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
dae9700
feat(inference): migration deployment to v1
Laure-di Apr 24, 2025
3359869
feat(inference): BYOM support
Laure-di Apr 25, 2025
e401f22
add sweeper
Laure-di Apr 25, 2025
8bc4617
manage id from regional or not
Laure-di Apr 25, 2025
533f8e8
remove error_message
Laure-di Apr 25, 2025
7512e43
fix linter
Laure-di Apr 25, 2025
4f129e4
last cassette
Laure-di Apr 28, 2025
9beb5b4
fix documentation
Laure-di Apr 28, 2025
40a9392
fix documentation lint
Laure-di Apr 28, 2025
c910960
remove comment
Laure-di Apr 28, 2025
c53fd23
Update docs/resources/inference_custom_model.md
Laure-di Apr 28, 2025
54e9077
change model_id format
Laure-di Apr 28, 2025
7233d94
use of dsf.locality
Laure-di Apr 29, 2025
dbb2b2e
ResourceCustomModelDelete return right err and testAccCheckCustomMode…
Laure-di Apr 29, 2025
cadc3f2
fix(doc): add import part and fix typo
Laure-di Apr 29, 2025
91113f7
fix(doc): deployment required attribute
Laure-di Apr 29, 2025
b02faf7
fix(inference): use of existing function cast
Laure-di Apr 29, 2025
8c62809
Update docs/resources/inference_custom_model.md
Laure-di Apr 29, 2025
2c94ded
skip tests until further notice
Laure-di Apr 29, 2025
17b9663
activate tests
Laure-di Apr 30, 2025
fae6676
fix(inference): rename resource from custom_model to model
Laure-di Apr 30, 2025
92f7cfd
update sdk-go
Laure-di Apr 30, 2025
c95dc22
remove unecessary file
Laure-di Apr 30, 2025
8755556
fix(doc): put real URL and more context
Laure-di Apr 30, 2025
f890be1
add support model data-source
Laure-di May 12, 2025
d2c516c
testing
Laure-di May 14, 2025
c6b5ec7
add test
Laure-di May 14, 2025
7ac52b7
update doc and tests
Laure-di May 15, 2025
1f93d2e
fix linter
Laure-di May 15, 2025
fc80ab4
fix linter
Laure-di May 15, 2025
aa15cf5
remove custom reference
Laure-di May 15, 2025
f6fa71d
update cassette
Laure-di May 15, 2025
715ed4e
update cassette deployment with datasource
Laure-di May 15, 2025
cfaa353
Merge branch 'master' into migration-inference-v1
remyleone May 15, 2025
2720cd0
Merge branch 'master' into migration-inference-v1
remyleone May 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions docs/data-sources/inference_model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
subcategory: "inference"
page_title: "Scaleway: scaleway_inference_model"
---

# scaleway_inference_model

The `scaleway_inference_model` data source allows you to retrieve information about an inference model available in the Scaleway Inference API, either by providing the model's `name` or its `model_id`.

## Example Usage

### Basic

```hcl
data "scaleway_inference_model" "my_model" {
name = "meta/llama-3.1-8b-instruct:fp8"
}
```

## Argument Reference

You must provide either name or model_id, but not both.

- `name` (Optional, Conflicts with model_id) The fully qualified name of the model to look up (e.g., "meta/llama-3.1-8b-instruct:fp8"). The provider will search for a model with an exact name match in the selected region and project.
- `model_id` (Optional, Conflicts with name) The ID of the model to retrieve. Must be a valid UUID with locality (i.e., Scaleway's zoned UUID format).
- `project_id` (Optional) The project ID to use when listing models. If not provided, the provider default project is used.
- `region` (Optional) The region where the model is hosted. If not set, the provider default region is used.

## Attributes Reference

In addition to the input arguments above, the following attributes are exported:

- `id` - The unique identifier of the model.
- `tags` - Tags associated with the model.
- `status` - The current status of the model (e.g., ready, error, etc.).
- `description` - A textual description of the model (if available).
- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
- `parameter_size_bits` - Size, in bits, of the model parameters.
- `size_bytes` - Total size, in bytes, of the model archive.
- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
- `node_type_name` - The type of node supported.
- `quantization` - A list of supported quantization options, including:
- `quantization_bits` - Number of bits used for quantization (e.g., 8, 16).
- `allowed` - Whether this quantization is allowed.
- `max_context_size` - Maximum context length supported by this quantization.
10 changes: 7 additions & 3 deletions docs/resources/inference_deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de
### Basic

```terraform
data "scaleway_inference_model" "my_model" {
name = "meta/llama-3.1-8b-instruct:fp8"
}

resource "scaleway_inference_deployment" "deployment" {
name = "tf-inference-deployment"
node_type = "L4"
model_name = "meta/llama-3.1-8b-instruct:fp8"
model_name = data.scaleway_inference_model.my_model.id
public_endpoint {
is_enabled = true
}
Expand All @@ -26,7 +30,7 @@ resource "scaleway_inference_deployment" "deployment" {

## Argument Reference

- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
- `model_id` - (Required) The model id used for the deployment.
- `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`)
- `name` - (Optional) The deployment name.
- `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept.
Expand All @@ -48,7 +52,7 @@ resource "scaleway_inference_deployment" "deployment" {
In addition to all arguments above, the following attributes are exported:

- `id` - The ID of the deployment.
- `model_id` - The model id used for the deployment.
- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
- `size` - The size of the pool.
- `status` - The status of the deployment.
- `created_at` - The date and time of the creation of the deployment.
Expand Down
76 changes: 76 additions & 0 deletions docs/resources/inference_model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
---
subcategory: "Inference"
page_title: "Scaleway: scaleway_inference_model"
---

# Resource: scaleway_inference_model

The scaleway_inference_model resource allows you to upload and manage inference models in the Scaleway Inference ecosystem. Once registered, a model can be used in any scaleway_inference_deployment resource.

## Example Usage

### Basic

```terraform
resource "scaleway_inference_model" "test" {
name = "my-awesome-model"
url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
secret = "my-secret-token"
}
```

### Deploy your own model on your managed inference

```terraform
resource "scaleway_inference_model" "my_model" {
name = "my-awesome-model"
url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
secret = "my-secret-token"
}

resource "scaleway_inference_deployment" "my_deployment" {
name = "test-inference-deployment-basic"
node_type = "H100" # replace with your node type
model_id = scaleway_inference_model.my_model.id

public_endpoint {
is_enabled = true
}

accept_eula = true
}
```

## Argument Reference

- `name` - (Required) The name of the model. This must be unique within the project.
- `url` - (Required) The HTTPS source URL from which the model will be downloaded. This is typically a Hugging Face repository URL (e.g., https://huggingface.co/agentica-org/DeepCoder-14B-Preview). The URL must be publicly accessible or require valid credentials via `secret`
- `secret` - (Optional, Sensitive) Authentication token used to pull the model from a private or gated URL (e.g., a Hugging Face access token with read permission).
- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created.
- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with.

## Attributes Reference

In addition to all arguments above, the following attributes are exported:

- `id` - The unique identifier of the model.
- `tags` - Tags associated with the model.
- `status` - The current status of the model (e.g., ready, error, etc.).
- `description` - A textual description of the model (if available).
- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
- `parameter_size_bits` - Size, in bits, of the model parameters.
- `size_bytes` - Total size, in bytes, of the model archive.
- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
- `node_type_name` - The type of node supported.
- `quantization` - A list of supported quantization options, including:
- `quantization_bits` - Number of bits used for quantization (e.g., 8, 16).
- `allowed` - Whether this quantization is allowed.
- `max_context_size` - Maximum context length supported by this quantization.

## Import

Models can be imported using, `{region}/{id}`, as shown below:

```bash
terraform import scaleway_inference_model.my_model fr-par/11111111-1111-1111-1111-111111111111
```
2 changes: 2 additions & 0 deletions internal/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc {
"scaleway_iam_ssh_key": iam.ResourceSSKKey(),
"scaleway_iam_user": iam.ResourceUser(),
"scaleway_inference_deployment": inference.ResourceDeployment(),
"scaleway_inference_model": inference.ResourceModel(),
"scaleway_instance_image": instance.ResourceImage(),
"scaleway_instance_ip": instance.ResourceIP(),
"scaleway_instance_ip_reverse_dns": instance.ResourceIPReverseDNS(),
Expand Down Expand Up @@ -273,6 +274,7 @@ func Provider(config *Config) plugin.ProviderFunc {
"scaleway_iam_ssh_key": iam.DataSourceSSHKey(),
"scaleway_iam_user": iam.DataSourceUser(),
"scaleway_iam_api_key": iam.DataSourceAPIKey(),
"scaleway_inference_model": inference.DataSourceModel(),
"scaleway_instance_image": instance.DataSourceImage(),
"scaleway_instance_ip": instance.DataSourceIP(),
"scaleway_instance_placement_group": instance.DataSourcePlacementGroup(),
Expand Down
54 changes: 35 additions & 19 deletions internal/services/inference/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import (
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1"
"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
"github.com/scaleway/scaleway-sdk-go/scw"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/dsf"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/types"
Expand Down Expand Up @@ -43,17 +45,20 @@ func ResourceDeployment() *schema.Resource {
"node_type": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
Description: "The node type to use for the deployment",
},
"model_name": {
Type: schema.TypeString,
Required: true,
Computed: true,
Description: "The model name to use for the deployment",
},
"model_id": {
Type: schema.TypeString,
Computed: true,
Description: "The model id used for the deployment",
Type: schema.TypeString,
Required: true,
Description: "The model id used for the deployment",
ForceNew: true,
DiffSuppressFunc: dsf.Locality,
},
"accept_eula": {
Type: schema.TypeBool,
Expand All @@ -70,16 +75,21 @@ func ResourceDeployment() *schema.Resource {
"min_size": {
Type: schema.TypeInt,
Optional: true,
Computed: true,
Description: "The minimum size of the pool",
ValidateFunc: validation.IntAtLeast(1),
Default: 1,
},
"max_size": {
Type: schema.TypeInt,
Optional: true,
Computed: true,
Description: "The maximum size of the pool",
ValidateFunc: validation.IntAtLeast(1),
Default: 1,
},
"quantization": {
Type: schema.TypeInt,
Optional: true,
Description: "The number of bits each model parameter should be quantized to",
},
"size": {
Type: schema.TypeInt,
Expand Down Expand Up @@ -178,13 +188,13 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
}

req := &inference.CreateDeploymentRequest{
Region: region,
ProjectID: d.Get("project_id").(string),
Name: d.Get("name").(string),
NodeType: d.Get("node_type").(string),
ModelName: d.Get("model_name").(string),
Tags: types.ExpandStrings(d.Get("tags")),
Endpoints: buildEndpoints(d),
Region: region,
ProjectID: d.Get("project_id").(string),
Name: d.Get("name").(string),
NodeTypeName: d.Get("node_type").(string),
ModelID: locality.ExpandID(d.Get("model_id").(string)),
Tags: types.ExpandStrings(d.Get("tags")),
Endpoints: buildEndpoints(d),
}

if isAcceptingEula, ok := d.GetOk("accept_eula"); ok {
Expand All @@ -199,6 +209,12 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
req.MaxSize = scw.Uint32Ptr(uint32(maxSize.(int)))
}

if quantization, ok := d.GetOk("quantization"); ok {
req.Quantization = &inference.DeploymentQuantization{
Bits: uint32(quantization.(int)),
}
}

deployment, err := api.CreateDeployment(req, scw.WithContext(ctx))
if err != nil {
return diag.FromErr(err)
Expand All @@ -221,8 +237,8 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
publicEndpointMap := publicEndpoint.([]interface{})[0].(map[string]interface{})
if publicEndpointMap["is_enabled"].(bool) {
publicEp := inference.EndpointSpec{
Public: &inference.EndpointSpecPublic{},
DisableAuth: publicEndpointMap["disable_auth"].(bool),
PublicNetwork: &inference.EndpointPublicNetworkDetails{},
DisableAuth: publicEndpointMap["disable_auth"].(bool),
}
endpoints = append(endpoints, &publicEp)
}
Expand All @@ -232,7 +248,7 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
privateEndpointMap := privateEndpoint.([]interface{})[0].(map[string]interface{})
if privateID, exists := privateEndpointMap["private_network_id"]; exists {
privateEp := inference.EndpointSpec{
PrivateNetwork: &inference.EndpointSpecPrivateNetwork{
PrivateNetwork: &inference.EndpointPrivateNetworkDetails{
PrivateNetworkID: regional.ExpandID(privateID.(string)).ID,
},
DisableAuth: privateEndpointMap["disable_auth"].(bool),
Expand Down Expand Up @@ -264,7 +280,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
_ = d.Set("name", deployment.Name)
_ = d.Set("region", deployment.Region)
_ = d.Set("project_id", deployment.ProjectID)
_ = d.Set("node_type", deployment.NodeType)
_ = d.Set("node_type", deployment.NodeTypeName)
_ = d.Set("model_name", deployment.ModelName)
_ = d.Set("min_size", int(deployment.MinSize))
_ = d.Set("max_size", int(deployment.MaxSize))
Expand All @@ -290,7 +306,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
privateEndpoints = append(privateEndpoints, privateEndpointSpec)
}

if endpoint.PublicAccess != nil {
if endpoint.PublicNetwork != nil {
publicEndpointSpec := map[string]interface{}{
"id": endpoint.ID,
"is_enabled": true,
Expand Down
Loading
Loading