scaleway · Laure-di · Apr 24, 2025 · Apr 24, 2025 · Apr 25, 2025 · Apr 25, 2025
@@ -0,0 +1,76 @@
+---
+subcategory: "Inference"
+page_title: "Scaleway: scaleway_inference_custom_model"
+---
+
+# Resource: scaleway_inference_custom_model
+
+The scaleway_inference_custom_model resource allows you to upload and manage custom inference models in the Scaleway Inference ecosystem. Once registered, a custom model can be used in any scaleway_inference_deployment resource.
+
+## Example Usage
+
+### Basic
+
+```terraform
+resource "scaleway_inference_custom_model" "test" {
+  name = "my-awesome-model"
+  url = "https://huggingface.co/my-awsome-model"
+  secret = "my-secret-token"
+}
+```
+
+### Deploy your own model on your managed inference
+
+```terraform
+resource "scaleway_inference_custom_model" "my_model" {
+  name = "my-awesome-model"
+  url = "https://huggingface.co/my-awsome-model"
+  secret = "my-secret-token"
+}
+
+resource "scaleway_inference_deployment" "my_deployment" {
+  name      = "test-inference-deployment-basic"
+  node_type = "A100-80GB" # replace with your node type
+  model_id  = scaleway_inference_custom_model.my_model.id
+
+  public_endpoint {
+    is_enabled = true
+  }
+
+  accept_eula = true
+}
+```
+
+## Argument Reference
+
+- `name` - (Required) The name of the custom model. This must be unique within the project.
+- `url` - (Required) The HTTPS URL pointing to your model
+- `secret` - (Optional, Sensitive) Secret used to authenticate when pulling the model from a private URL.
+- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created.
+- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with.
+
+## Attributes Reference
+
+In addition to all arguments above, the following attributes are exported:
+
+- `id` - The unique identifier of the custom model.
+- `tags` - Tags associated with the model.
+- `status` - The current status of the model (e.g., ready, error, etc.).
+- `description` - A textual description of the model (if available).
+- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
+- `parameter_size_bits` - Size, in bits, of the model parameters.
+- `size_bytes` - Total size, in bytes, of the model archive.
+- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
+        - `node_type_name` - The type of node supported.
+        - `quantization` - A list of supported quantization options, including:
+            - `quantization_bits` -  Number of bits used for quantization (e.g., 8, 16).
+            - `allowed` - Whether this quantization is allowed.
+            - `max_context_size` - Maximum context length supported by this quantization.
+
+## Import
+
+Functions can be imported using, `{region}/{id}`, as shown below:
+
+```bash
+terraform import scaleway_inference_custom_model.my_model fr-par/11111111-1111-1111-1111-111111111111
+```
@@ -16,7 +16,7 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de
 resource "scaleway_inference_deployment" "deployment" {
   name = "tf-inference-deployment"
   node_type = "L4"
-  model_name = "meta/llama-3.1-8b-instruct:fp8"
+  model_id = "d33fb5fd-75ca-4dfb-8952-8af8b8b28be5"
   public_endpoint {
     is_enabled = true
   }
@@ -26,7 +26,7 @@ resource "scaleway_inference_deployment" "deployment" {
 
 ## Argument Reference
 
-- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
+- `model_id` - (Required) The model id used for the deployment.
 - `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`)
 - `name` - (Optional) The deployment name.
 - `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept.
@@ -48,7 +48,7 @@ resource "scaleway_inference_deployment" "deployment" {
 In addition to all arguments above, the following attributes are exported:
 
 - `id` - The ID of the deployment.
-- `model_id` - The model id used for the deployment.
+- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
 - `size` - The size of the pool.
 - `status` - The status of the deployment.
 - `created_at` - The date and time of the creation of the deployment.

@@ -27,7 +27,7 @@ require (
 	github.com/nats-io/jwt/v2 v2.7.3
 	github.com/nats-io/nats.go v1.38.0
 	github.com/robfig/cron/v3 v3.0.1
-	github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c
+	github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26
 	github.com/stretchr/testify v1.10.0
 	golang.org/x/crypto v0.36.0
 	gopkg.in/dnaeon/go-vcr.v3 v3.2.0

@@ -298,8 +298,8 @@ github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
 github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
 github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
 github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
-github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c h1:sjbNFhI3o5ecQuxLZv54Gm/YlqP55Ot5l7ShneWeNg8=
-github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c/go.mod h1:w4o02EHpO0CBGy2nehzWRaFQKd62G9HIf+Q07PDaUcE=
+github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26 h1:6KJ16mZbrP/ahxkbJGTCjHdJJdCJF1Hfwnw92Q5sf3I=
+github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26/go.mod h1:w4o02EHpO0CBGy2nehzWRaFQKd62G9HIf+Q07PDaUcE=
 github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
 github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
 github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=

@@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc {
 				"scaleway_iam_ssh_key":                         iam.ResourceSSKKey(),
 				"scaleway_iam_user":                            iam.ResourceUser(),
 				"scaleway_inference_deployment":                inference.ResourceDeployment(),
+				"scaleway_inference_custom_model":              inference.ResourceCustomModel(),
 				"scaleway_instance_image":                      instance.ResourceImage(),
 				"scaleway_instance_ip":                         instance.ResourceIP(),
 				"scaleway_instance_ip_reverse_dns":             instance.ResourceIPReverseDNS(),

@@ -0,0 +1,231 @@
+package inference
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
+	"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
+	"github.com/scaleway/scaleway-sdk-go/scw"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account"
+	"github.com/scaleway/terraform-provider-scaleway/v2/internal/types"
+)
+
+func ResourceCustomModel() *schema.Resource {
+	return &schema.Resource{
+		CreateContext: ResourceCustomModelCreate,
+		ReadContext:   ResourceCustomModelRead,
+		DeleteContext: ResourceCustomModelDelete,
+		Importer: &schema.ResourceImporter{
+			StateContext: schema.ImportStatePassthroughContext,
+		},
+		Timeouts: &schema.ResourceTimeout{
+			Default: schema.DefaultTimeout(defaultCustomModelTimeout),
+			Create:  schema.DefaultTimeout(defaultCustomModelTimeout),
+			Update:  schema.DefaultTimeout(defaultCustomModelTimeout),
+			Delete:  schema.DefaultTimeout(defaultCustomModelTimeout),
+		},
+		SchemaVersion: 0,
+		Schema: map[string]*schema.Schema{
+			"name": {
+				Type:        schema.TypeString,
+				Required:    true,
+				ForceNew:    true,
+				Description: "The name of the model",
+			},
+			"url": {
+				Type:        schema.TypeString,
+				Required:    true,
+				ForceNew:    true,
+				Description: "The URL of the model",
+			},
+			"secret": {
+				Type:        schema.TypeString,
+				Optional:    true,
+				Sensitive:   true,
+				ForceNew:    true,
+				Description: "The secret to pull a model",
+			},
+			"tags": {
+				Type:        schema.TypeList,
+				Elem:        &schema.Schema{Type: schema.TypeString},
+				Computed:    true,
+				Description: "The tags associated with the deployment",
+			},
+			"project_id": account.ProjectIDSchema(),
+			"status": {
+				Type:        schema.TypeString,
+				Computed:    true,
+				Description: "The status of the model",
+			},
+			"description": {
+				Type:        schema.TypeString,
+				Computed:    true,
+				Description: "The description of the model",
+			},
+			"created_at": {
+				Type:        schema.TypeString,
+				Computed:    true,
+				Description: "The date and time of the creation of the model",
+			},
+			"updated_at": {
+				Type:        schema.TypeString,
+				Computed:    true,
+				Description: "The date and time of the last update of the model",
+			},
+			"has_eula": {
+				Type:        schema.TypeBool,
+				Computed:    true,
+				Description: "Defines whether the model has an end user license agreement",
+			},
+			"nodes_support": {
+				Type:        schema.TypeList,
+				Computed:    true,
+				Description: "Supported node types with quantization options and context lengths.",
+				Elem: &schema.Resource{
+					Schema: map[string]*schema.Schema{
+						"node_type_name": {
+							Type:        schema.TypeString,
+							Computed:    true,
+							Description: "Supported node type.",
+						},
+						"quantization": {
+							Type:        schema.TypeList,
+							Computed:    true,
+							Description: "Supported quantization options.",
+							Elem: &schema.Resource{
+								Schema: map[string]*schema.Schema{
+									"quantization_bits": {
+										Type:        schema.TypeInt,
+										Computed:    true,
+										Description: "Number of bits used for quantization.",
+									},
+									"allowed": {
+										Type:        schema.TypeBool,
+										Computed:    true,
+										Description: "Whether this quantization is allowed for the model.",
+									},
+									"max_context_size": {
+										Type:        schema.TypeInt,
+										Computed:    true,
+										Description: "Maximum inference context size for this quantization and node type.",
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			"parameter_size_bits": {
+				Type:        schema.TypeInt,
+				Computed:    true,
+				Description: "Size, in bits, of the model parameters",
+			},
+			"size_bytes": {
+				Type:        schema.TypeInt,
+				Computed:    true,
+				Description: "Total size, in bytes, of the model files",
+			},
+			"region": regional.Schema(),
+		},
+	}
+}
+
+func ResourceCustomModelCreate(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
+	api, region, err := NewAPIWithRegion(d, m)
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	modelSource := &inference.ModelSource{
+		URL: d.Get("url").(string),
+	}
+
+	if secret, ok := d.GetOk("secret"); ok {
+		modelSource.Secret = types.ExpandStringPtr(secret)
+	}
+
+	reqCreateModel := &inference.CreateModelRequest{
+		Region:    region,
+		Name:      d.Get("name").(string),
+		ProjectID: d.Get("project_id").(string),
+		Source:    modelSource,
+	}
+
+	model, err := api.CreateModel(reqCreateModel)
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	d.SetId(regional.NewIDString(region, model.ID))
+
+	model, err = waitForModel(ctx, api, region, model.ID, d.Timeout(schema.TimeoutCreate))
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	if model.Status == inference.ModelStatusError {
+		errMsg := *model.ErrorMessage
+
+		return diag.FromErr(fmt.Errorf("model '%s' is in status '%s'", model.ID, errMsg))
+	}
+
+	return ResourceCustomModelRead(ctx, d, m)
+}
+
+func ResourceCustomModelRead(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
+	api, region, id, err := NewAPIWithRegionAndID(m, d.Id())
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	model, err := waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutRead))
+	if err != nil {
+		if httperrors.Is404(err) {
+			d.SetId("")
+
+			return nil
+		}
+
+		return diag.FromErr(err)
+	}
+
+	_ = d.Set("parameter_size_bits", int32(model.ParameterSizeBits))
+	_ = d.Set("size_bytes", int64(model.SizeBytes))
+	_ = d.Set("name", model.Name)
+	_ = d.Set("status", model.Status.String())
+	_ = d.Set("description", model.Description)
+	_ = d.Set("tags", model.Tags)
+	_ = d.Set("created_at", types.FlattenTime(model.CreatedAt))
+	_ = d.Set("updated_at", types.FlattenTime(model.UpdatedAt))
+	_ = d.Set("has_eula", model.HasEula)
+	_ = d.Set("nodes_support", flattenNodeSupport(model.NodesSupport))
+
+	return nil
+}
+
+func ResourceCustomModelDelete(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
+	api, region, id, err := NewAPIWithRegionAndID(m, d.Id())
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	_, err = waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutDelete))
+	if err != nil {
+		return diag.FromErr(err)
+	}
+
+	err = api.DeleteModel(&inference.DeleteModelRequest{
+		Region:  region,
+		ModelID: id,
+	}, scw.WithContext(ctx))
+
+	if err != nil && !httperrors.Is404(err) {
+		return diag.FromErr(err)
+	}
+
+	return nil
+}