Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions docs/resources/inference_custom_model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
---
subcategory: "Inference"
page_title: "Scaleway: scaleway_inference_custom_model"
---

# Resource: scaleway_inference_custom_model

The scaleway_inference_custom_model resource allows you to upload and manage custom inference models in the Scaleway Inference ecosystem. Once registered, a custom model can be used in any scaleway_inference_deployment resource.

## Example Usage

### Basic

```terraform
resource "scaleway_inference_custom_model" "test" {
name = "my-awesome-model"
url = "https://huggingface.co/my-awsome-model"
secret = "my-secret-token"
}
```

### Deploy your own model on your managed inference

```terraform
resource "scaleway_inference_custom_model" "my_model" {
name = "my-awesome-model"
url = "https://huggingface.co/my-awsome-model"
secret = "my-secret-token"
}

resource "scaleway_inference_deployment" "my_deployment" {
name = "test-inference-deployment-basic"
node_type = "A100-80GB" # replace with your node type
model_id = scaleway_inference_custom_model.my_model.id

public_endpoint {
is_enabled = true
}

accept_eula = true
}
```

## Argument Reference

- `name` - (Required) The name of the custom model. This must be unique within the project.
- `url` - (Required) The HTTPS URL pointing to your model
- `secret` - (Optional, Sensitive) Secret used to authenticate when pulling the model from a private URL.
- `region` - (Defaults to [provider](../index.md#region) `region`) The [region](../guides/regions_and_zones.md#regions) in which the deployment is created.
- `project_id` - (Defaults to [provider](../index.md#project_id) `project_id`) The ID of the project the deployment is associated with.

## Attributes Reference

In addition to all arguments above, the following attributes are exported:

- `id` - The unique identifier of the custom model.
- `tags` - Tags associated with the model.
- `status` - The current status of the model (e.g., ready, error, etc.).
- `description` - A textual description of the model (if available).
- `has_eula` - Whether the model requires end-user license agreement acceptance before use.
- `parameter_size_bits` - Size, in bits, of the model parameters.
- `size_bytes` - Total size, in bytes, of the model archive.
- `nodes_support` - List of supported node types and their quantization options. Each entry contains:
- `node_type_name` - The type of node supported.
- `quantization` - A list of supported quantization options, including:
- `quantization_bits` - Number of bits used for quantization (e.g., 8, 16).
- `allowed` - Whether this quantization is allowed.
- `max_context_size` - Maximum context length supported by this quantization.

## Import

Functions can be imported using, `{region}/{id}`, as shown below:

```bash
terraform import scaleway_inference_custom_model.my_model fr-par/11111111-1111-1111-1111-111111111111
```
6 changes: 3 additions & 3 deletions docs/resources/inference_deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ For more information, see the [API documentation](https://www.scaleway.com/en/de
resource "scaleway_inference_deployment" "deployment" {
name = "tf-inference-deployment"
node_type = "L4"
model_name = "meta/llama-3.1-8b-instruct:fp8"
model_id = "d33fb5fd-75ca-4dfb-8952-8af8b8b28be5"
public_endpoint {
is_enabled = true
}
Expand All @@ -26,7 +26,7 @@ resource "scaleway_inference_deployment" "deployment" {

## Argument Reference

- `model_name` - (Required) The model name to use for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
- `model_id` - (Required) The model id used for the deployment.
- `node_type` - (Required) The node type to use for the deployment. Node types can be found using Scaleway's CLI (`scw inference node-type list`)
- `name` - (Optional) The deployment name.
- `accept_eula` - (Optional) Some models (e.g Meta Llama) require end-user license agreements. Set `true` to accept.
Expand All @@ -48,7 +48,7 @@ resource "scaleway_inference_deployment" "deployment" {
In addition to all arguments above, the following attributes are exported:

- `id` - The ID of the deployment.
- `model_id` - The model id used for the deployment.
- `model_name` - The model name used for the deployment. Model names can be found in Console or using Scaleway's CLI (`scw inference model list`)
- `size` - The size of the pool.
- `status` - The status of the deployment.
- `created_at` - The date and time of the creation of the deployment.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ require (
github.com/nats-io/jwt/v2 v2.7.3
github.com/nats-io/nats.go v1.38.0
github.com/robfig/cron/v3 v3.0.1
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26
github.com/stretchr/testify v1.10.0
golang.org/x/crypto v0.36.0
gopkg.in/dnaeon/go-vcr.v3 v3.2.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,8 @@ github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c h1:sjbNFhI3o5ecQuxLZv54Gm/YlqP55Ot5l7ShneWeNg8=
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250424152954-b4babe8f214c/go.mod h1:w4o02EHpO0CBGy2nehzWRaFQKd62G9HIf+Q07PDaUcE=
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26 h1:6KJ16mZbrP/ahxkbJGTCjHdJJdCJF1Hfwnw92Q5sf3I=
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33.0.20250425085959-ea0a849e0b26/go.mod h1:w4o02EHpO0CBGy2nehzWRaFQKd62G9HIf+Q07PDaUcE=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
Expand Down
1 change: 1 addition & 0 deletions internal/provider/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ func Provider(config *Config) plugin.ProviderFunc {
"scaleway_iam_ssh_key": iam.ResourceSSKKey(),
"scaleway_iam_user": iam.ResourceUser(),
"scaleway_inference_deployment": inference.ResourceDeployment(),
"scaleway_inference_custom_model": inference.ResourceCustomModel(),
"scaleway_instance_image": instance.ResourceImage(),
"scaleway_instance_ip": instance.ResourceIP(),
"scaleway_instance_ip_reverse_dns": instance.ResourceIPReverseDNS(),
Expand Down
231 changes: 231 additions & 0 deletions internal/services/inference/custom_model.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
package inference

import (
"context"
"fmt"

"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/scaleway/scaleway-sdk-go/api/inference/v1"
"github.com/scaleway/scaleway-sdk-go/scw"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/httperrors"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/locality/regional"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/services/account"
"github.com/scaleway/terraform-provider-scaleway/v2/internal/types"
)

func ResourceCustomModel() *schema.Resource {
return &schema.Resource{
CreateContext: ResourceCustomModelCreate,
ReadContext: ResourceCustomModelRead,
DeleteContext: ResourceCustomModelDelete,
Importer: &schema.ResourceImporter{
StateContext: schema.ImportStatePassthroughContext,
},
Timeouts: &schema.ResourceTimeout{
Default: schema.DefaultTimeout(defaultCustomModelTimeout),
Create: schema.DefaultTimeout(defaultCustomModelTimeout),
Update: schema.DefaultTimeout(defaultCustomModelTimeout),
Delete: schema.DefaultTimeout(defaultCustomModelTimeout),
},
SchemaVersion: 0,
Schema: map[string]*schema.Schema{
"name": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
Description: "The name of the model",
},
"url": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
Description: "The URL of the model",
},
"secret": {
Type: schema.TypeString,
Optional: true,
Sensitive: true,
ForceNew: true,
Description: "The secret to pull a model",
},
"tags": {
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeString},
Computed: true,
Description: "The tags associated with the deployment",
},
"project_id": account.ProjectIDSchema(),
"status": {
Type: schema.TypeString,
Computed: true,
Description: "The status of the model",
},
"description": {
Type: schema.TypeString,
Computed: true,
Description: "The description of the model",
},
"created_at": {
Type: schema.TypeString,
Computed: true,
Description: "The date and time of the creation of the model",
},
"updated_at": {
Type: schema.TypeString,
Computed: true,
Description: "The date and time of the last update of the model",
},
"has_eula": {
Type: schema.TypeBool,
Computed: true,
Description: "Defines whether the model has an end user license agreement",
},
"nodes_support": {
Type: schema.TypeList,
Computed: true,
Description: "Supported node types with quantization options and context lengths.",
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"node_type_name": {
Type: schema.TypeString,
Computed: true,
Description: "Supported node type.",
},
"quantization": {
Type: schema.TypeList,
Computed: true,
Description: "Supported quantization options.",
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"quantization_bits": {
Type: schema.TypeInt,
Computed: true,
Description: "Number of bits used for quantization.",
},
"allowed": {
Type: schema.TypeBool,
Computed: true,
Description: "Whether this quantization is allowed for the model.",
},
"max_context_size": {
Type: schema.TypeInt,
Computed: true,
Description: "Maximum inference context size for this quantization and node type.",
},
},
},
},
},
},
},
"parameter_size_bits": {
Type: schema.TypeInt,
Computed: true,
Description: "Size, in bits, of the model parameters",
},
"size_bytes": {
Type: schema.TypeInt,
Computed: true,
Description: "Total size, in bytes, of the model files",
},
"region": regional.Schema(),
},
}
}

func ResourceCustomModelCreate(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
api, region, err := NewAPIWithRegion(d, m)
if err != nil {
return diag.FromErr(err)
}

modelSource := &inference.ModelSource{
URL: d.Get("url").(string),
}

if secret, ok := d.GetOk("secret"); ok {
modelSource.Secret = types.ExpandStringPtr(secret)
}

reqCreateModel := &inference.CreateModelRequest{
Region: region,
Name: d.Get("name").(string),
ProjectID: d.Get("project_id").(string),
Source: modelSource,
}

model, err := api.CreateModel(reqCreateModel)
if err != nil {
return diag.FromErr(err)
}

d.SetId(regional.NewIDString(region, model.ID))

model, err = waitForModel(ctx, api, region, model.ID, d.Timeout(schema.TimeoutCreate))
if err != nil {
return diag.FromErr(err)
}

if model.Status == inference.ModelStatusError {
errMsg := *model.ErrorMessage

return diag.FromErr(fmt.Errorf("model '%s' is in status '%s'", model.ID, errMsg))
}

return ResourceCustomModelRead(ctx, d, m)
}

func ResourceCustomModelRead(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
api, region, id, err := NewAPIWithRegionAndID(m, d.Id())
if err != nil {
return diag.FromErr(err)
}

model, err := waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutRead))
if err != nil {
if httperrors.Is404(err) {
d.SetId("")

return nil
}

return diag.FromErr(err)
}

_ = d.Set("parameter_size_bits", int32(model.ParameterSizeBits))
_ = d.Set("size_bytes", int64(model.SizeBytes))
_ = d.Set("name", model.Name)
_ = d.Set("status", model.Status.String())
_ = d.Set("description", model.Description)
_ = d.Set("tags", model.Tags)
_ = d.Set("created_at", types.FlattenTime(model.CreatedAt))
_ = d.Set("updated_at", types.FlattenTime(model.UpdatedAt))
_ = d.Set("has_eula", model.HasEula)
_ = d.Set("nodes_support", flattenNodeSupport(model.NodesSupport))

return nil
}

func ResourceCustomModelDelete(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics {
api, region, id, err := NewAPIWithRegionAndID(m, d.Id())
if err != nil {
return diag.FromErr(err)
}

_, err = waitForModel(ctx, api, region, id, d.Timeout(schema.TimeoutDelete))
if err != nil {
return diag.FromErr(err)
}

err = api.DeleteModel(&inference.DeleteModelRequest{
Region: region,
ModelID: id,
}, scw.WithContext(ctx))

if err != nil && !httperrors.Is404(err) {
return diag.FromErr(err)
}

return nil
}
Loading
Loading