scaleway · remyleone · May 15, 2025 · Apr 24, 2025 · Apr 25, 2025 · Apr 25, 2025
@@ -0,0 +1,45 @@
+---
+subcategory: "inference"
+page_title: "Scaleway: scaleway_inference_model"
+---
+
+# scaleway_inference_model
+
+The `scaleway_inference_model` data source allows you to retrieve information about an inference model available in the Scaleway Inference API, either by providing the model's `name` or its `model_id`.
+
+## Example Usage
+
+### Basic
+
+```hcl
+data "scaleway_inference_model" "my_model" {
+  name = "meta/llama-3.1-8b-instruct:fp8"
+}
+```
+
+## Argument Reference
+
+You must provide either name or model_id, but not both.
+
+- `name` (Optional, Conflicts with model_id) The fully qualified name of the model to look up (e.g., "meta/llama-3.1-8b-instruct:fp8"). The provider will search for a model with an exact name match in the selected region and project.
+- `model_id` (Optional, Conflicts with name) The ID of the model to retrieve. Must be a valid UUID with locality (i.e., Scaleway's zoned UUID format).
+- `project_id` (Optional) The project ID to use when listing models. If not provided, the provider default project is used.
+- `region` (Optional) The region where the model is hosted. If not set, the provider default region is used.
+
+## Attributes Reference
+
+In addition to the input arguments above, the following attributes are exported:
+
+- `id` - The unique identifier of the model.
+- `tags` - Tags associated with the model.
+- `status` - The current status of the model (e.g., ready, error, etc.).
+- `description` - A textual description of the model (if available).
+- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
+- `parameter_size_bits` - Size, in bits, of the model parameters.
+- `size_bytes` - Total size, in bytes, of the model archive.
+- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
+      - `node_type_name` - The type of node supported.
+      - `quantization` - A list of supported quantization options, including:
+      - `quantization_bits` -  Number of bits used for quantization (e.g., 8, 16).
+      - `allowed` - Whether this quantization is allowed.
+      - `max_context_size` - Maximum context length supported by this quantization.
@@ -13,10 +13,14 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de
 ### Basic
 
 ```terraform
+data "scaleway_inference_model" "my_model" {
+  name = "meta/llama-3.1-8b-instruct:fp8"
+}
+
 resource "scaleway_inference_deployment" "deployment" {
   name       = "tf-inference-deployment"
   node_type  = "L4"
-  model_name = "meta/llama-3.1-8b-instruct:fp8"
+  model_name = data.scaleway_inference_model.my_model.id
   public_endpoint {
     is_enabled = true
   }
@@ -26,7 +30,7 @@ resource "scaleway_inference_deployment" "deployment" {
 
 ## Argument Reference
 
-- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
+- `model_id` - (Required) The model id used for the deployment.
 - `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`)
 - `name` - (Optional) The deployment name.
 - `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept.
@@ -48,7 +52,7 @@ resource "scaleway_inference_deployment" "deployment" {
 In addition to all arguments above, the following attributes are exported:
 
 - `id` - The ID of the deployment.
-- `model_id` - The model id used for the deployment.
+- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
 - `size` - The size of the pool.
 - `status` - The status of the deployment.
 - `created_at` - The date and time of the creation of the deployment.

@@ -0,0 +1,76 @@
+---
+subcategory: "Inference"
+page_title: "Scaleway: scaleway_inference_model"
+---
+
+# Resource: scaleway_inference_model
+
+The scaleway_inference_model resource allows you to upload and manage inference models in the Scaleway Inference ecosystem. Once registered, a model can be used in any scaleway_inference_deployment resource.
+
+## Example Usage
+
+### Basic
+
+```terraform
+resource "scaleway_inference_model" "test" {
+  name = "my-awesome-model"
+  url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
+  secret = "my-secret-token"
+}
+```
+
+### Deploy your own model on your managed inference
+
+```terraform
+resource "scaleway_inference_model" "my_model" {
+  name = "my-awesome-model"
+  url = "https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
+  secret = "my-secret-token"
+}
+
+resource "scaleway_inference_deployment" "my_deployment" {
+  name      = "test-inference-deployment-basic"
+  node_type = "H100" # replace with your node type
+  model_id  = scaleway_inference_model.my_model.id
+
+  public_endpoint {
+    is_enabled = true
+  }
+
+  accept_eula = true
+}
+```
+
+## Argument Reference
+
+- `name` - (Required) The name of the model. This must be unique within the project.
+- `url` - (Required) The HTTPS source URL from which the model will be downloaded. This is typically a Hugging Face repository URL (e.g., https://huggingface.co/agentica-org/DeepCoder-14B-Preview). The URL must be publicly accessible or require valid credentials via `secret`
+- `secret` - (Optional, Sensitive) Authentication token used to pull the model from a private or gated URL (e.g., a Hugging Face access token with read permission).
+- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created.
+- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with.
+
+## Attributes Reference
+
+In addition to all arguments above, the following attributes are exported:
+
+- `id` - The unique identifier of the model.
+- `tags` - Tags associated with the model.
+- `status` - The current status of the model (e.g., ready, error, etc.).
+- `description` - A textual description of the model (if available).
+- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
+- `parameter_size_bits` - Size, in bits, of the model parameters.
+- `size_bytes` - Total size, in bytes, of the model archive.
+- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
+        - `node_type_name` - The type of node supported.
+        - `quantization` - A list of supported quantization options, including:
+            - `quantization_bits` -  Number of bits used for quantization (e.g., 8, 16).
+            - `allowed` - Whether this quantization is allowed.
+            - `max_context_size` - Maximum context length supported by this quantization.
+
+## Import
+
+Models can be imported using, `{region}/{id}`, as shown below:
+
+```bash
+terraform import scaleway_inference_model.my_model fr-par/11111111-1111-1111-1111-111111111111
+```
@@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc {
 				"scaleway_iam_ssh_key":                         iam.ResourceSSKKey(),
 				"scaleway_iam_user":                            iam.ResourceUser(),
 				"scaleway_inference_deployment":                inference.ResourceDeployment(),
+				"scaleway_inference_model":                     inference.ResourceModel(),
 				"scaleway_instance_image":                      instance.ResourceImage(),
 				"scaleway_instance_ip":                         instance.ResourceIP(),
 				"scaleway_instance_ip_reverse_dns":             instance.ResourceIPReverseDNS(),
@@ -273,6 +274,7 @@ func Provider(config *Config) plugin.ProviderFunc {
 				"scaleway_iam_ssh_key":                         iam.DataSourceSSHKey(),
 				"scaleway_iam_user":                            iam.DataSourceUser(),
 				"scaleway_iam_api_key":                         iam.DataSourceAPIKey(),
+				"scaleway_inference_model":                     inference.DataSourceModel(),
 				"scaleway_instance_image":                      instance.DataSourceImage(),
 				"scaleway_instance_ip":                         instance.DataSourceIP(),
 				"scaleway_instance_placement_group":            instance.DataSourcePlacementGroup(),

@@ -6,9 +6,11 @@ import (
 	"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
-	inference "github.com/scaleway/scaleway-sdk-go/api/inference/v1beta1"
+	"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
 	"github.com/scaleway/scaleway-sdk-go/scw"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/dsf"
 	"github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality"
 	"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional"
 	"github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account"
 	"github.com/scaleway/terraform-provider-scaleway/v2/internal/types"
@@ -43,17 +45,20 @@ func ResourceDeployment() *schema.Resource {
 			"node_type": {
 				Type:        schema.TypeString,
 				Required:    true,
+				ForceNew:    true,
 				Description: "The node type to use for the deployment",
 			},
 			"model_name": {
 				Type:        schema.TypeString,
-				Required:    true,
+				Computed:    true,
 				Description: "The model name to use for the deployment",
 			},
 			"model_id": {
-				Type:        schema.TypeString,
-				Computed:    true,
-				Description: "The model id used for the deployment",
+				Type:             schema.TypeString,
+				Required:         true,
+				Description:      "The model id used for the deployment",
+				ForceNew:         true,
+				DiffSuppressFunc: dsf.Locality,
 			},
 			"accept_eula": {
 				Type:        schema.TypeBool,
@@ -70,16 +75,21 @@ func ResourceDeployment() *schema.Resource {
 			"min_size": {
 				Type:         schema.TypeInt,
 				Optional:     true,
-				Computed:     true,
 				Description:  "The minimum size of the pool",
 				ValidateFunc: validation.IntAtLeast(1),
+				Default:      1,
 			},
 			"max_size": {
 				Type:         schema.TypeInt,
 				Optional:     true,
-				Computed:     true,
 				Description:  "The maximum size of the pool",
 				ValidateFunc: validation.IntAtLeast(1),
+				Default:      1,
+			},
+			"quantization": {
+				Type:        schema.TypeInt,
+				Optional:    true,
+				Description: "The number of bits each model parameter should be quantized to",
 			},
 			"size": {
 				Type:        schema.TypeInt,
@@ -178,13 +188,13 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
 	}
 
 	req := &inference.CreateDeploymentRequest{
-		Region:    region,
-		ProjectID: d.Get("project_id").(string),
-		Name:      d.Get("name").(string),
-		NodeType:  d.Get("node_type").(string),
-		ModelName: d.Get("model_name").(string),
-		Tags:      types.ExpandStrings(d.Get("tags")),
-		Endpoints: buildEndpoints(d),
+		Region:       region,
+		ProjectID:    d.Get("project_id").(string),
+		Name:         d.Get("name").(string),
+		NodeTypeName: d.Get("node_type").(string),
+		ModelID:      locality.ExpandID(d.Get("model_id").(string)),
+		Tags:         types.ExpandStrings(d.Get("tags")),
+		Endpoints:    buildEndpoints(d),
 	}
 
 	if isAcceptingEula, ok := d.GetOk("accept_eula"); ok {
@@ -199,6 +209,12 @@ func ResourceDeploymentCreate(ctx context.Context, d *schema.ResourceData, m int
 		req.MaxSize = scw.Uint32Ptr(uint32(maxSize.(int)))
 	}
 
+	if quantization, ok := d.GetOk("quantization"); ok {
+		req.Quantization = &inference.DeploymentQuantization{
+			Bits: uint32(quantization.(int)),
+		}
+	}
+
 	deployment, err := api.CreateDeployment(req, scw.WithContext(ctx))
 	if err != nil {
 		return diag.FromErr(err)
@@ -221,8 +237,8 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
 		publicEndpointMap := publicEndpoint.([]interface{})[0].(map[string]interface{})
 		if publicEndpointMap["is_enabled"].(bool) {
 			publicEp := inference.EndpointSpec{
-				Public:      &inference.EndpointSpecPublic{},
-				DisableAuth: publicEndpointMap["disable_auth"].(bool),
+				PublicNetwork: &inference.EndpointPublicNetworkDetails{},
+				DisableAuth:   publicEndpointMap["disable_auth"].(bool),
 			}
 			endpoints = append(endpoints, &publicEp)
 		}
@@ -232,7 +248,7 @@ func buildEndpoints(d *schema.ResourceData) []*inference.EndpointSpec {
 		privateEndpointMap := privateEndpoint.([]interface{})[0].(map[string]interface{})
 		if privateID, exists := privateEndpointMap["private_network_id"]; exists {
 			privateEp := inference.EndpointSpec{
-				PrivateNetwork: &inference.EndpointSpecPrivateNetwork{
+				PrivateNetwork: &inference.EndpointPrivateNetworkDetails{
 					PrivateNetworkID: regional.ExpandID(privateID.(string)).ID,
 				},
 				DisableAuth: privateEndpointMap["disable_auth"].(bool),
@@ -264,7 +280,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
 	_ = d.Set("name", deployment.Name)
 	_ = d.Set("region", deployment.Region)
 	_ = d.Set("project_id", deployment.ProjectID)
-	_ = d.Set("node_type", deployment.NodeType)
+	_ = d.Set("node_type", deployment.NodeTypeName)
 	_ = d.Set("model_name", deployment.ModelName)
 	_ = d.Set("min_size", int(deployment.MinSize))
 	_ = d.Set("max_size", int(deployment.MaxSize))
@@ -290,7 +306,7 @@ func ResourceDeploymentRead(ctx context.Context, d *schema.ResourceData, m inter
 			privateEndpoints = append(privateEndpoints, privateEndpointSpec)
 		}
 
-		if endpoint.PublicAccess != nil {
+		if endpoint.PublicNetwork != nil {
 			publicEndpointSpec := map[string]interface{}{
 				"id":           endpoint.ID,
 				"is_enabled":   true,